fix: otel not working without USER
This commit is contained in:
parent
ad566ee9ef
commit
c5ed376d5f
127 changed files with 347 additions and 305 deletions
16
pkg/observability/logs/aws.go
Normal file
16
pkg/observability/logs/aws.go
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"github.com/aws/smithy-go/logging"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func NewAWSWrapper(logger zerolog.Logger) logging.LoggerFunc {
|
||||
if logger.GetLevel() > zerolog.DebugLevel {
|
||||
return func(classification logging.Classification, format string, args ...interface{}) {}
|
||||
}
|
||||
|
||||
return func(classification logging.Classification, format string, args ...interface{}) {
|
||||
logger.Debug().CallerSkipFrame(2).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
}
|
||||
25
pkg/observability/logs/aws_test.go
Normal file
25
pkg/observability/logs/aws_test.go
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/aws/smithy-go/logging"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewAWSWrapper(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewAWSWrapper(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Logf(logging.Debug, "%s", "foo")
|
||||
|
||||
assert.Equal(t, "<nil> DBG aws_test.go:22 > foo\n", buf.String())
|
||||
}
|
||||
15
pkg/observability/logs/datadog.go
Normal file
15
pkg/observability/logs/datadog.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
package logs
|
||||
|
||||
import "github.com/rs/zerolog"
|
||||
|
||||
type DatadogLogger struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
func NewDatadogLogger(logger zerolog.Logger) *DatadogLogger {
|
||||
return &DatadogLogger{logger: logger}
|
||||
}
|
||||
|
||||
func (d DatadogLogger) Log(msg string) {
|
||||
d.logger.Debug().CallerSkipFrame(1).Msg(msg)
|
||||
}
|
||||
24
pkg/observability/logs/datadog_test.go
Normal file
24
pkg/observability/logs/datadog_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewDatadogLogger(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewDatadogLogger(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Log("foo")
|
||||
|
||||
assert.Equal(t, "<nil> DBG datadog_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
19
pkg/observability/logs/elastic.go
Normal file
19
pkg/observability/logs/elastic.go
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
package logs
|
||||
|
||||
import "github.com/rs/zerolog"
|
||||
|
||||
type ElasticLogger struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
func NewElasticLogger(logger zerolog.Logger) *ElasticLogger {
|
||||
return &ElasticLogger{logger: logger}
|
||||
}
|
||||
|
||||
func (l ElasticLogger) Debugf(format string, args ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).Msgf(format, args...)
|
||||
}
|
||||
|
||||
func (l ElasticLogger) Errorf(format string, args ...interface{}) {
|
||||
l.logger.Error().CallerSkipFrame(1).Msgf(format, args...)
|
||||
}
|
||||
24
pkg/observability/logs/elastic_test.go
Normal file
24
pkg/observability/logs/elastic_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewElasticLogger(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewElasticLogger(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Errorf("foo")
|
||||
|
||||
assert.Equal(t, "<nil> ERR elastic_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
17
pkg/observability/logs/fields.go
Normal file
17
pkg/observability/logs/fields.go
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
package logs
|
||||
|
||||
// Log entry names.
|
||||
const (
|
||||
EntryPointName = "entryPointName"
|
||||
RouterName = "routerName"
|
||||
Rule = "rule"
|
||||
MiddlewareName = "middlewareName"
|
||||
MiddlewareType = "middlewareType"
|
||||
ProviderName = "providerName"
|
||||
ServiceName = "serviceName"
|
||||
MetricsProviderName = "metricsProviderName"
|
||||
TracingProviderName = "tracingProviderName"
|
||||
ServerIndex = "serverIndex"
|
||||
TLSStoreName = "tlsStoreName"
|
||||
ServersTransportName = "serversTransport"
|
||||
)
|
||||
17
pkg/observability/logs/gokit.go
Normal file
17
pkg/observability/logs/gokit.go
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
kitlog "github.com/go-kit/log"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func NewGoKitWrapper(logger zerolog.Logger) kitlog.LoggerFunc {
|
||||
if logger.GetLevel() > zerolog.DebugLevel {
|
||||
return func(args ...interface{}) error { return nil }
|
||||
}
|
||||
|
||||
return func(args ...interface{}) error {
|
||||
logger.Debug().CallerSkipFrame(2).MsgFunc(msgFunc(args...))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
24
pkg/observability/logs/gokit_test.go
Normal file
24
pkg/observability/logs/gokit_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewGoKitWrapper(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewGoKitWrapper(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
_ = logger.Log("foo")
|
||||
|
||||
assert.Equal(t, "<nil> DBG gokit_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
75
pkg/observability/logs/hclog.go
Normal file
75
pkg/observability/logs/hclog.go
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
// RetryableHTTPLogger wraps our logger and implements retryablehttp.LeveledLogger.
|
||||
// The retry library sends fields as pairs of keys and values as structured logging,
|
||||
// so we need to adapt them to our logger.
|
||||
type RetryableHTTPLogger struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
// NewRetryableHTTPLogger creates an implementation of the retryablehttp.LeveledLogger.
|
||||
func NewRetryableHTTPLogger(logger zerolog.Logger) *RetryableHTTPLogger {
|
||||
return &RetryableHTTPLogger{logger: logger}
|
||||
}
|
||||
|
||||
// Error starts a new message with error level.
|
||||
func (l RetryableHTTPLogger) Error(msg string, keysAndValues ...interface{}) {
|
||||
logWithLevel(l.logger.Error().CallerSkipFrame(2), msg, keysAndValues...)
|
||||
}
|
||||
|
||||
// Info starts a new message with info level.
|
||||
func (l RetryableHTTPLogger) Info(msg string, keysAndValues ...interface{}) {
|
||||
logWithLevel(l.logger.Info().CallerSkipFrame(2), msg, keysAndValues...)
|
||||
}
|
||||
|
||||
// Debug starts a new message with debug level.
|
||||
func (l RetryableHTTPLogger) Debug(msg string, keysAndValues ...interface{}) {
|
||||
logWithLevel(l.logger.Debug().CallerSkipFrame(2), msg, keysAndValues...)
|
||||
}
|
||||
|
||||
// Warn starts a new message with warn level.
|
||||
func (l RetryableHTTPLogger) Warn(msg string, keysAndValues ...interface{}) {
|
||||
logWithLevel(l.logger.Warn().CallerSkipFrame(2), msg, keysAndValues...)
|
||||
}
|
||||
|
||||
func logWithLevel(ev *zerolog.Event, msg string, kvs ...interface{}) {
|
||||
if len(kvs)%2 == 0 {
|
||||
for i := 0; i < len(kvs)-1; i += 2 {
|
||||
// The first item of the pair (the key) is supposed to be a string.
|
||||
key, ok := kvs[i].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
val := kvs[i+1]
|
||||
|
||||
var s fmt.Stringer
|
||||
if s, ok = val.(fmt.Stringer); ok {
|
||||
ev.Str(key, s.String())
|
||||
} else {
|
||||
ev.Interface(key, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Capitalize first character.
|
||||
first := true
|
||||
msg = strings.Map(func(r rune) rune {
|
||||
if first {
|
||||
first = false
|
||||
return unicode.ToTitle(r)
|
||||
}
|
||||
|
||||
return r
|
||||
}, msg)
|
||||
|
||||
ev.Msg(msg)
|
||||
}
|
||||
24
pkg/observability/logs/hclog_test.go
Normal file
24
pkg/observability/logs/hclog_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewRetryableHTTPLogger(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewRetryableHTTPLogger(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Info("foo")
|
||||
|
||||
assert.Equal(t, "<nil> INF hclog_test.go:21 > Foo\n", buf.String())
|
||||
}
|
||||
29
pkg/observability/logs/instana.go
Normal file
29
pkg/observability/logs/instana.go
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
type InstanaLogger struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
func NewInstanaLogger(logger zerolog.Logger) *InstanaLogger {
|
||||
return &InstanaLogger{logger: logger}
|
||||
}
|
||||
|
||||
func (l InstanaLogger) Debug(args ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l InstanaLogger) Info(args ...interface{}) {
|
||||
l.logger.Info().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l InstanaLogger) Warn(args ...interface{}) {
|
||||
l.logger.Warn().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l InstanaLogger) Error(args ...interface{}) {
|
||||
l.logger.Error().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
24
pkg/observability/logs/instana_test.go
Normal file
24
pkg/observability/logs/instana_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewInstanaLogger(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewInstanaLogger(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Info("foo")
|
||||
|
||||
assert.Equal(t, "<nil> INF instana_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
35
pkg/observability/logs/log.go
Normal file
35
pkg/observability/logs/log.go
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func NoLevel(logger zerolog.Logger, level zerolog.Level) zerolog.Logger {
|
||||
return logger.Hook(NewNoLevelHook(logger.GetLevel(), level))
|
||||
}
|
||||
|
||||
type NoLevelHook struct {
|
||||
minLevel zerolog.Level
|
||||
level zerolog.Level
|
||||
}
|
||||
|
||||
func NewNoLevelHook(minLevel zerolog.Level, level zerolog.Level) *NoLevelHook {
|
||||
return &NoLevelHook{minLevel: minLevel, level: level}
|
||||
}
|
||||
|
||||
func (n NoLevelHook) Run(e *zerolog.Event, level zerolog.Level, _ string) {
|
||||
if n.minLevel > n.level {
|
||||
e.Discard()
|
||||
return
|
||||
}
|
||||
|
||||
if level == zerolog.NoLevel {
|
||||
e.Str("level", n.level.String())
|
||||
}
|
||||
}
|
||||
|
||||
func msgFunc(i ...any) func() string {
|
||||
return func() string { return fmt.Sprint(i...) }
|
||||
}
|
||||
24
pkg/observability/logs/log_test.go
Normal file
24
pkg/observability/logs/log_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNoLevel(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NoLevel(zerolog.New(out).With().Caller().Logger(), zerolog.DebugLevel)
|
||||
|
||||
logger.Info().Msg("foo")
|
||||
|
||||
assert.Equal(t, "<nil> INF log_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
49
pkg/observability/logs/logrus.go
Normal file
49
pkg/observability/logs/logrus.go
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
type LogrusStdWrapper struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
func NewLogrusWrapper(logger zerolog.Logger) *LogrusStdWrapper {
|
||||
return &LogrusStdWrapper{logger: logger}
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Print(args ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Printf(s string, args ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).Msgf(s, args...)
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Println(args ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Fatal(args ...interface{}) {
|
||||
l.logger.Fatal().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Fatalf(s string, args ...interface{}) {
|
||||
l.logger.Fatal().CallerSkipFrame(1).Msgf(s, args...)
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Fatalln(args ...interface{}) {
|
||||
l.logger.Fatal().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Panic(args ...interface{}) {
|
||||
l.logger.Panic().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Panicf(s string, args ...interface{}) {
|
||||
l.logger.Panic().CallerSkipFrame(1).Msgf(s, args...)
|
||||
}
|
||||
|
||||
func (l LogrusStdWrapper) Panicln(args ...interface{}) {
|
||||
l.logger.Panic().CallerSkipFrame(1).MsgFunc(msgFunc(args...))
|
||||
}
|
||||
24
pkg/observability/logs/logrus_test.go
Normal file
24
pkg/observability/logs/logrus_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewLogrusStdWrapper(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewLogrusWrapper(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Println("foo")
|
||||
|
||||
assert.Equal(t, "<nil> DBG logrus_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
125
pkg/observability/logs/otel.go
Normal file
125
pkg/observability/logs/otel.go
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/traefik/traefik/v3/pkg/observability"
|
||||
"github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
otellog "go.opentelemetry.io/otel/log"
|
||||
)
|
||||
|
||||
// SetupOTelLogger sets up the OpenTelemetry logger.
|
||||
func SetupOTelLogger(ctx context.Context, logger zerolog.Logger, config *types.OTelLog) (zerolog.Logger, error) {
|
||||
if config == nil {
|
||||
return logger, nil
|
||||
}
|
||||
|
||||
if err := observability.EnsureUserEnvVar(); err != nil {
|
||||
return zerolog.Logger{}, err
|
||||
}
|
||||
provider, err := config.NewLoggerProvider(ctx)
|
||||
if err != nil {
|
||||
return zerolog.Logger{}, fmt.Errorf("setting up OpenTelemetry logger provider: %w", err)
|
||||
}
|
||||
|
||||
return logger.Hook(&otelLoggerHook{logger: provider.Logger("traefik")}), nil
|
||||
}
|
||||
|
||||
// otelLoggerHook is a zerolog hook that forwards logs to OpenTelemetry.
|
||||
type otelLoggerHook struct {
|
||||
logger otellog.Logger
|
||||
}
|
||||
|
||||
// Run forwards the log message to OpenTelemetry.
|
||||
func (h *otelLoggerHook) Run(e *zerolog.Event, level zerolog.Level, message string) {
|
||||
if level == zerolog.Disabled {
|
||||
return
|
||||
}
|
||||
|
||||
// Discard the event to avoid double logging.
|
||||
e.Discard()
|
||||
|
||||
var record otellog.Record
|
||||
record.SetTimestamp(time.Now().UTC())
|
||||
record.SetSeverity(otelLogSeverity(level))
|
||||
record.SetBody(otellog.StringValue(message))
|
||||
|
||||
// See https://github.com/rs/zerolog/issues/493.
|
||||
// This is a workaround to get the log fields from the event.
|
||||
// At the moment there's no way to get the log fields from the event, so we use reflection to get the buffer and parse it.
|
||||
logData := make(map[string]any)
|
||||
eventBuffer := fmt.Sprintf("%s}", reflect.ValueOf(e).Elem().FieldByName("buf"))
|
||||
if err := json.Unmarshal([]byte(eventBuffer), &logData); err != nil {
|
||||
record.AddAttributes(otellog.String("parsing_error", fmt.Sprintf("parsing log fields: %s", err)))
|
||||
h.logger.Emit(e.GetCtx(), record)
|
||||
return
|
||||
}
|
||||
|
||||
recordAttributes := make([]otellog.KeyValue, 0, len(logData))
|
||||
for k, v := range logData {
|
||||
if k == "level" {
|
||||
continue
|
||||
}
|
||||
if k == "time" {
|
||||
eventTimestamp, ok := v.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, eventTimestamp)
|
||||
if err == nil {
|
||||
record.SetTimestamp(t)
|
||||
continue
|
||||
}
|
||||
}
|
||||
var attributeValue otellog.Value
|
||||
switch v := v.(type) {
|
||||
case string:
|
||||
attributeValue = otellog.StringValue(v)
|
||||
case int:
|
||||
attributeValue = otellog.IntValue(v)
|
||||
case int64:
|
||||
attributeValue = otellog.Int64Value(v)
|
||||
case float64:
|
||||
attributeValue = otellog.Float64Value(v)
|
||||
case bool:
|
||||
attributeValue = otellog.BoolValue(v)
|
||||
case []byte:
|
||||
attributeValue = otellog.BytesValue(v)
|
||||
default:
|
||||
attributeValue = otellog.StringValue(fmt.Sprintf("%v", v))
|
||||
}
|
||||
recordAttributes = append(recordAttributes, otellog.KeyValue{
|
||||
Key: k,
|
||||
Value: attributeValue,
|
||||
})
|
||||
}
|
||||
record.AddAttributes(recordAttributes...)
|
||||
|
||||
h.logger.Emit(e.GetCtx(), record)
|
||||
}
|
||||
|
||||
func otelLogSeverity(level zerolog.Level) otellog.Severity {
|
||||
switch level {
|
||||
case zerolog.TraceLevel:
|
||||
return otellog.SeverityTrace
|
||||
case zerolog.DebugLevel:
|
||||
return otellog.SeverityDebug
|
||||
case zerolog.InfoLevel:
|
||||
return otellog.SeverityInfo
|
||||
case zerolog.WarnLevel:
|
||||
return otellog.SeverityWarn
|
||||
case zerolog.ErrorLevel:
|
||||
return otellog.SeverityError
|
||||
case zerolog.FatalLevel:
|
||||
return otellog.SeverityFatal
|
||||
case zerolog.PanicLevel:
|
||||
return otellog.SeverityFatal4
|
||||
default:
|
||||
return otellog.SeverityUndefined
|
||||
}
|
||||
}
|
||||
196
pkg/observability/logs/otel_test.go
Normal file
196
pkg/observability/logs/otel_test.go
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"go.opentelemetry.io/collector/pdata/plog/plogotlp"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
func TestLog(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
level zerolog.Level
|
||||
assertFn func(*testing.T, string)
|
||||
noLog bool
|
||||
}{
|
||||
{
|
||||
desc: "no level log",
|
||||
level: zerolog.NoLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityUndefined Severity = 0 // UNDEFINED
|
||||
assert.NotContains(t, log, `"severityNumber"`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trace log",
|
||||
level: zerolog.TraceLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityTrace1 Severity = 1 // TRACE
|
||||
assert.Contains(t, log, `"severityNumber":1`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "debug log",
|
||||
level: zerolog.DebugLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityDebug1 Severity = 5 // DEBUG
|
||||
assert.Contains(t, log, `"severityNumber":5`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "info log",
|
||||
level: zerolog.InfoLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityInfo1 Severity = 9 // INFO
|
||||
assert.Contains(t, log, `"severityNumber":9`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "warn log",
|
||||
level: zerolog.WarnLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityWarn1 Severity = 13 // WARN
|
||||
assert.Contains(t, log, `"severityNumber":13`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "error log",
|
||||
level: zerolog.ErrorLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityError1 Severity = 17 // ERROR
|
||||
assert.Contains(t, log, `"severityNumber":17`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "fatal log",
|
||||
level: zerolog.FatalLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityFatal Severity = 21 // FATAL
|
||||
assert.Contains(t, log, `"severityNumber":21`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "panic log",
|
||||
level: zerolog.PanicLevel,
|
||||
assertFn: func(t *testing.T, log string) {
|
||||
t.Helper()
|
||||
// SeverityFatal4 Severity = 24 // FATAL
|
||||
assert.Contains(t, log, `"severityNumber":24`)
|
||||
assert.Regexp(t, `{"key":"resource","value":{"stringValue":"attribute"}}`, log)
|
||||
assert.Regexp(t, `{"key":"service.name","value":{"stringValue":"test"}}`, log)
|
||||
assert.Regexp(t, `"body":{"stringValue":"test"}`, log)
|
||||
assert.Regexp(t, `{"key":"foo","value":{"stringValue":"bar"}}`, log)
|
||||
assert.Regexp(t, `"traceId":"01020304050607080000000000000000","spanId":"0102030405060708"`, log)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
logCh := make(chan string)
|
||||
collector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gzr, err := gzip.NewReader(r.Body)
|
||||
require.NoError(t, err)
|
||||
|
||||
body, err := io.ReadAll(gzr)
|
||||
require.NoError(t, err)
|
||||
|
||||
req := plogotlp.NewExportRequest()
|
||||
err = req.UnmarshalProto(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
marshalledReq, err := json.Marshal(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
logCh <- string(marshalledReq)
|
||||
}))
|
||||
t.Cleanup(collector.Close)
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
config := &otypes.OTelLog{
|
||||
ServiceName: "test",
|
||||
ResourceAttributes: map[string]string{"resource": "attribute"},
|
||||
HTTP: &otypes.OTelHTTP{
|
||||
Endpoint: collector.URL,
|
||||
},
|
||||
}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
|
||||
logger := zerolog.New(out).With().Caller().Logger()
|
||||
|
||||
logger, err := SetupOTelLogger(t.Context(), logger, config)
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := trace.ContextWithSpanContext(t.Context(), trace.NewSpanContext(trace.SpanContextConfig{
|
||||
TraceID: trace.TraceID{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8},
|
||||
SpanID: trace.SpanID{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8},
|
||||
}))
|
||||
logger = logger.With().Ctx(ctx).Logger()
|
||||
|
||||
logger.WithLevel(test.level).Str("foo", "bar").Msg("test")
|
||||
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Error("Log not exported")
|
||||
|
||||
case log := <-logCh:
|
||||
if test.assertFn != nil {
|
||||
test.assertFn(t, log)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
27
pkg/observability/logs/oxy.go
Normal file
27
pkg/observability/logs/oxy.go
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
package logs
|
||||
|
||||
import "github.com/rs/zerolog"
|
||||
|
||||
type OxyWrapper struct {
|
||||
logger zerolog.Logger
|
||||
}
|
||||
|
||||
func NewOxyWrapper(logger zerolog.Logger) *OxyWrapper {
|
||||
return &OxyWrapper{logger: logger}
|
||||
}
|
||||
|
||||
func (l OxyWrapper) Debug(s string, i ...interface{}) {
|
||||
l.logger.Debug().CallerSkipFrame(1).Msgf(s, i...)
|
||||
}
|
||||
|
||||
func (l OxyWrapper) Info(s string, i ...interface{}) {
|
||||
l.logger.Info().CallerSkipFrame(1).Msgf(s, i...)
|
||||
}
|
||||
|
||||
func (l OxyWrapper) Warn(s string, i ...interface{}) {
|
||||
l.logger.Warn().CallerSkipFrame(1).Msgf(s, i...)
|
||||
}
|
||||
|
||||
func (l OxyWrapper) Error(s string, i ...interface{}) {
|
||||
l.logger.Error().CallerSkipFrame(1).Msgf(s, i...)
|
||||
}
|
||||
24
pkg/observability/logs/oxy_test.go
Normal file
24
pkg/observability/logs/oxy_test.go
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewOxyWrapper(t *testing.T) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cwb := zerolog.ConsoleWriter{Out: buf, TimeFormat: time.RFC3339, NoColor: true}
|
||||
|
||||
out := zerolog.MultiLevelWriter(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}, cwb)
|
||||
|
||||
logger := NewOxyWrapper(zerolog.New(out).With().Caller().Logger())
|
||||
|
||||
logger.Info("foo")
|
||||
|
||||
assert.Equal(t, "<nil> INF oxy_test.go:21 > foo\n", buf.String())
|
||||
}
|
||||
32
pkg/observability/logs/wasm.go
Normal file
32
pkg/observability/logs/wasm.go
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
package logs
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/http-wasm/http-wasm-host-go/api"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
// compile-time check to ensure ConsoleLogger implements api.Logger.
|
||||
var _ api.Logger = WasmLogger{}
|
||||
|
||||
// WasmLogger is a convenience which writes anything above LogLevelInfo to os.Stdout.
|
||||
type WasmLogger struct {
|
||||
logger *zerolog.Logger
|
||||
}
|
||||
|
||||
func NewWasmLogger(logger *zerolog.Logger) *WasmLogger {
|
||||
return &WasmLogger{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// IsEnabled implements the same method as documented on api.Logger.
|
||||
func (w WasmLogger) IsEnabled(level api.LogLevel) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Log implements the same method as documented on api.Logger.
|
||||
func (w WasmLogger) Log(_ context.Context, level api.LogLevel, message string) {
|
||||
w.logger.WithLevel(zerolog.Level(level + 1)).Msg(message)
|
||||
}
|
||||
174
pkg/observability/metrics/datadog.go
Normal file
174
pkg/observability/metrics/datadog.go
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics/dogstatsd"
|
||||
"github.com/go-kit/kit/util/conn"
|
||||
gokitlog "github.com/go-kit/log"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/observability/logs"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"github.com/traefik/traefik/v3/pkg/safe"
|
||||
)
|
||||
|
||||
const (
|
||||
unixAddressPrefix = "unix://"
|
||||
unixAddressDatagramPrefix = "unixgram://"
|
||||
unixAddressStreamPrefix = "unixstream://"
|
||||
)
|
||||
|
||||
var (
|
||||
datadogClient *dogstatsd.Dogstatsd
|
||||
datadogLoopCancelFunc context.CancelFunc
|
||||
)
|
||||
|
||||
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
|
||||
const (
|
||||
ddConfigReloadsName = "config.reload.total"
|
||||
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
ddOpenConnsName = "open.connections"
|
||||
|
||||
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
|
||||
|
||||
ddEntryPointReqsName = "entrypoint.request.total"
|
||||
ddEntryPointReqsTLSName = "entrypoint.request.tls.total"
|
||||
ddEntryPointReqDurationName = "entrypoint.request.duration"
|
||||
ddEntryPointReqsBytesName = "entrypoint.requests.bytes.total"
|
||||
ddEntryPointRespsBytesName = "entrypoint.responses.bytes.total"
|
||||
|
||||
ddRouterReqsName = "router.request.total"
|
||||
ddRouterReqsTLSName = "router.request.tls.total"
|
||||
ddRouterReqsDurationName = "router.request.duration"
|
||||
ddRouterReqsBytesName = "router.requests.bytes.total"
|
||||
ddRouterRespsBytesName = "router.responses.bytes.total"
|
||||
|
||||
ddServiceReqsName = "service.request.total"
|
||||
ddServiceReqsTLSName = "service.request.tls.total"
|
||||
ddServiceReqsDurationName = "service.request.duration"
|
||||
ddServiceRetriesName = "service.retries.total"
|
||||
ddServiceServerUpName = "service.server.up"
|
||||
ddServiceReqsBytesName = "service.requests.bytes.total"
|
||||
ddServiceRespsBytesName = "service.responses.bytes.total"
|
||||
)
|
||||
|
||||
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
|
||||
func RegisterDatadog(ctx context.Context, config *otypes.Datadog) Registry {
|
||||
// Ensures there is only one DataDog client sending metrics at any given time.
|
||||
StopDatadog()
|
||||
|
||||
// just to be sure there is a prefix defined
|
||||
if config.Prefix == "" {
|
||||
config.Prefix = defaultMetricsPrefix
|
||||
}
|
||||
|
||||
datadogLogger := logs.NewGoKitWrapper(log.Logger.With().Str(logs.MetricsProviderName, "datadog").Logger())
|
||||
datadogClient = dogstatsd.New(config.Prefix+".", datadogLogger)
|
||||
|
||||
initDatadogClient(ctx, config, datadogLogger)
|
||||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
|
||||
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
|
||||
openConnectionsGauge: datadogClient.NewGauge(ddOpenConnsName),
|
||||
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = NewCounterWithNoopHeaders(datadogClient.NewCounter(ddEntryPointReqsName, 1.0))
|
||||
registry.entryPointReqsTLSCounter = datadogClient.NewCounter(ddEntryPointReqsTLSName, 1.0)
|
||||
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second)
|
||||
registry.entryPointReqsBytesCounter = datadogClient.NewCounter(ddEntryPointReqsBytesName, 1.0)
|
||||
registry.entryPointRespsBytesCounter = datadogClient.NewCounter(ddEntryPointRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
if config.AddRoutersLabels {
|
||||
registry.routerEnabled = config.AddRoutersLabels
|
||||
registry.routerReqsCounter = NewCounterWithNoopHeaders(datadogClient.NewCounter(ddRouterReqsName, 1.0))
|
||||
registry.routerReqsTLSCounter = datadogClient.NewCounter(ddRouterReqsTLSName, 1.0)
|
||||
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddRouterReqsDurationName, 1.0), time.Second)
|
||||
registry.routerReqsBytesCounter = datadogClient.NewCounter(ddRouterReqsBytesName, 1.0)
|
||||
registry.routerRespsBytesCounter = datadogClient.NewCounter(ddRouterRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = NewCounterWithNoopHeaders(datadogClient.NewCounter(ddServiceReqsName, 1.0))
|
||||
registry.serviceReqsTLSCounter = datadogClient.NewCounter(ddServiceReqsTLSName, 1.0)
|
||||
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddServiceReqsDurationName, 1.0), time.Second)
|
||||
registry.serviceRetriesCounter = datadogClient.NewCounter(ddServiceRetriesName, 1.0)
|
||||
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServiceServerUpName)
|
||||
registry.serviceReqsBytesCounter = datadogClient.NewCounter(ddServiceReqsBytesName, 1.0)
|
||||
registry.serviceRespsBytesCounter = datadogClient.NewCounter(ddServiceRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
func initDatadogClient(ctx context.Context, config *otypes.Datadog, logger gokitlog.LoggerFunc) {
|
||||
network, address := parseDatadogAddress(config.Address)
|
||||
|
||||
ctx, datadogLoopCancelFunc = context.WithCancel(ctx)
|
||||
|
||||
safe.Go(func() {
|
||||
ticker := time.NewTicker(time.Duration(config.PushInterval))
|
||||
defer ticker.Stop()
|
||||
|
||||
dialer := func(network, address string) (net.Conn, error) {
|
||||
switch network {
|
||||
case "unix":
|
||||
// To mimic the Datadog client when the network is unix we will try to guess the UDS type.
|
||||
newConn, err := net.Dial("unixgram", address)
|
||||
if err != nil && strings.Contains(err.Error(), "protocol wrong type for socket") {
|
||||
return net.Dial("unix", address)
|
||||
}
|
||||
return newConn, err
|
||||
|
||||
case "unixgram":
|
||||
return net.Dial("unixgram", address)
|
||||
|
||||
case "unixstream":
|
||||
return net.Dial("unix", address)
|
||||
|
||||
default:
|
||||
return net.Dial(network, address)
|
||||
}
|
||||
}
|
||||
datadogClient.WriteLoop(ctx, ticker.C, conn.NewManager(dialer, network, address, time.After, logger))
|
||||
})
|
||||
}
|
||||
|
||||
// StopDatadog stops the Datadog metrics pusher.
|
||||
func StopDatadog() {
|
||||
if datadogLoopCancelFunc != nil {
|
||||
datadogLoopCancelFunc()
|
||||
datadogLoopCancelFunc = nil
|
||||
}
|
||||
}
|
||||
|
||||
func parseDatadogAddress(address string) (string, string) {
|
||||
network := "udp"
|
||||
|
||||
var addr string
|
||||
switch {
|
||||
case strings.HasPrefix(address, unixAddressPrefix):
|
||||
network = "unix"
|
||||
addr = address[len(unixAddressPrefix):]
|
||||
case strings.HasPrefix(address, unixAddressDatagramPrefix):
|
||||
network = "unixgram"
|
||||
addr = address[len(unixAddressDatagramPrefix):]
|
||||
case strings.HasPrefix(address, unixAddressStreamPrefix):
|
||||
network = "unixstream"
|
||||
addr = address[len(unixAddressStreamPrefix):]
|
||||
case address != "":
|
||||
addr = address
|
||||
default:
|
||||
addr = "localhost:8125"
|
||||
}
|
||||
|
||||
return network, addr
|
||||
}
|
||||
155
pkg/observability/metrics/datadog_test.go
Normal file
155
pkg/observability/metrics/datadog_test.go
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
ptypes "github.com/traefik/paerser/types"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
)
|
||||
|
||||
func TestDatadog(t *testing.T) {
|
||||
t.Cleanup(StopDatadog)
|
||||
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
datadogRegistry := RegisterDatadog(t.Context(), &otypes.Datadog{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
|
||||
|
||||
if !datadogRegistry.IsEpEnabled() || !datadogRegistry.IsRouterEnabled() || !datadogRegistry.IsSvcEnabled() {
|
||||
t.Errorf("DatadogRegistry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
|
||||
}
|
||||
testDatadogRegistry(t, defaultMetricsPrefix, datadogRegistry)
|
||||
}
|
||||
|
||||
func TestDatadogWithPrefix(t *testing.T) {
|
||||
t.Cleanup(StopDatadog)
|
||||
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
datadogRegistry := RegisterDatadog(t.Context(), &otypes.Datadog{Prefix: "testPrefix", Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
|
||||
|
||||
testDatadogRegistry(t, "testPrefix", datadogRegistry)
|
||||
}
|
||||
|
||||
func TestDatadog_parseDatadogAddress(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
address string
|
||||
expNetwork string
|
||||
expAddress string
|
||||
}{
|
||||
{
|
||||
desc: "empty address",
|
||||
expNetwork: "udp",
|
||||
expAddress: "localhost:8125",
|
||||
},
|
||||
{
|
||||
desc: "udp address",
|
||||
address: "127.0.0.1:8080",
|
||||
expNetwork: "udp",
|
||||
expAddress: "127.0.0.1:8080",
|
||||
},
|
||||
{
|
||||
desc: "unix address",
|
||||
address: "unix:///path/to/datadog.socket",
|
||||
expNetwork: "unix",
|
||||
expAddress: "/path/to/datadog.socket",
|
||||
},
|
||||
{
|
||||
desc: "unixgram address",
|
||||
address: "unixgram:///path/to/datadog.socket",
|
||||
expNetwork: "unixgram",
|
||||
expAddress: "/path/to/datadog.socket",
|
||||
},
|
||||
{
|
||||
desc: "unixstream address",
|
||||
address: "unixstream:///path/to/datadog.socket",
|
||||
expNetwork: "unixstream",
|
||||
expAddress: "/path/to/datadog.socket",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
gotNetwork, gotAddress := parseDatadogAddress(test.address)
|
||||
assert.Equal(t, test.expNetwork, gotNetwork)
|
||||
assert.Equal(t, test.expAddress, gotAddress)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Registry) {
|
||||
t.Helper()
|
||||
|
||||
expected := []string{
|
||||
metricsPrefix + ".config.reload.total:1.000000|c\n",
|
||||
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
|
||||
metricsPrefix + ".open.connections:1.000000|g|#entrypoint:test,protocol:TCP\n",
|
||||
|
||||
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
|
||||
|
||||
metricsPrefix + ".entrypoint.request.total:1.000000|c|#entrypoint:test\n",
|
||||
metricsPrefix + ".entrypoint.request.tls.total:1.000000|c|#entrypoint:test,tls_version:foo,tls_cipher:bar\n",
|
||||
metricsPrefix + ".entrypoint.request.duration:10000.000000|h|#entrypoint:test\n",
|
||||
metricsPrefix + ".entrypoint.requests.bytes.total:1.000000|c|#entrypoint:test\n",
|
||||
metricsPrefix + ".entrypoint.responses.bytes.total:1.000000|c|#entrypoint:test\n",
|
||||
|
||||
metricsPrefix + ".router.request.total:1.000000|c|#router:demo,service:test,code:404,method:GET\n",
|
||||
metricsPrefix + ".router.request.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n",
|
||||
metricsPrefix + ".router.request.tls.total:1.000000|c|#router:demo,service:test,tls_version:foo,tls_cipher:bar\n",
|
||||
metricsPrefix + ".router.request.duration:10000.000000|h|#router:demo,service:test,code:200\n",
|
||||
metricsPrefix + ".router.requests.bytes.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n",
|
||||
metricsPrefix + ".router.responses.bytes.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n",
|
||||
|
||||
metricsPrefix + ".service.request.total:1.000000|c|#service:test,code:404,method:GET\n",
|
||||
metricsPrefix + ".service.request.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
metricsPrefix + ".service.request.tls.total:1.000000|c|#service:test,tls_version:foo,tls_cipher:bar\n",
|
||||
metricsPrefix + ".service.request.duration:10000.000000|h|#service:test,code:200\n",
|
||||
metricsPrefix + ".service.retries.total:2.000000|c|#service:test\n",
|
||||
metricsPrefix + ".service.request.duration:10000.000000|h|#service:test,code:200\n",
|
||||
metricsPrefix + ".service.server.up:1.000000|g|#service:test,url:http://127.0.0.1,one:two\n",
|
||||
metricsPrefix + ".service.requests.bytes.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
metricsPrefix + ".service.responses.bytes.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.ConfigReloadsCounter().Add(1)
|
||||
datadogRegistry.LastConfigReloadSuccessGauge().Add(1)
|
||||
datadogRegistry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Add(1)
|
||||
|
||||
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
|
||||
datadogRegistry.EntryPointReqsCounter().With(nil, "entrypoint", "test").Add(1)
|
||||
datadogRegistry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
datadogRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
datadogRegistry.EntryPointReqsBytesCounter().With("entrypoint", "test").Add(1)
|
||||
datadogRegistry.EntryPointRespsBytesCounter().With("entrypoint", "test").Add(1)
|
||||
|
||||
datadogRegistry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
datadogRegistry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
|
||||
datadogRegistry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
datadogRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
|
||||
datadogRegistry.ServiceReqsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ServiceRespsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
})
|
||||
}
|
||||
57
pkg/observability/metrics/headers.go
Normal file
57
pkg/observability/metrics/headers.go
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
)
|
||||
|
||||
// CounterWithHeaders represents a counter that can use http.Header values as label values.
|
||||
type CounterWithHeaders interface {
|
||||
Add(delta float64)
|
||||
With(headers http.Header, labelValues ...string) CounterWithHeaders
|
||||
}
|
||||
|
||||
// MultiCounterWithHeaders collects multiple individual CounterWithHeaders and treats them as a unit.
|
||||
type MultiCounterWithHeaders []CounterWithHeaders
|
||||
|
||||
// NewMultiCounterWithHeaders returns a multi-counter, wrapping the passed CounterWithHeaders.
|
||||
func NewMultiCounterWithHeaders(c ...CounterWithHeaders) MultiCounterWithHeaders {
|
||||
return c
|
||||
}
|
||||
|
||||
// Add adds the given delta value to the counter value.
|
||||
func (c MultiCounterWithHeaders) Add(delta float64) {
|
||||
for _, counter := range c {
|
||||
counter.Add(delta)
|
||||
}
|
||||
}
|
||||
|
||||
// With creates a new counter by appending the given label values and http.Header as labels and returns it.
|
||||
func (c MultiCounterWithHeaders) With(headers http.Header, labelValues ...string) CounterWithHeaders {
|
||||
next := make(MultiCounterWithHeaders, len(c))
|
||||
for i := range c {
|
||||
next[i] = c[i].With(headers, labelValues...)
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
// NewCounterWithNoopHeaders returns a CounterWithNoopHeaders.
|
||||
func NewCounterWithNoopHeaders(counter metrics.Counter) CounterWithNoopHeaders {
|
||||
return CounterWithNoopHeaders{counter: counter}
|
||||
}
|
||||
|
||||
// CounterWithNoopHeaders is a counter that satisfies CounterWithHeaders but ignores the given http.Header.
|
||||
type CounterWithNoopHeaders struct {
|
||||
counter metrics.Counter
|
||||
}
|
||||
|
||||
// Add adds the given delta value to the counter value.
|
||||
func (c CounterWithNoopHeaders) Add(delta float64) {
|
||||
c.counter.Add(delta)
|
||||
}
|
||||
|
||||
// With creates a new counter by appending the given label values and returns it.
|
||||
func (c CounterWithNoopHeaders) With(_ http.Header, labelValues ...string) CounterWithHeaders {
|
||||
return NewCounterWithNoopHeaders(c.counter.With(labelValues...))
|
||||
}
|
||||
174
pkg/observability/metrics/influxdb2.go
Normal file
174
pkg/observability/metrics/influxdb2.go
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics/influx"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
||||
influxdb2log "github.com/influxdata/influxdb-client-go/v2/log"
|
||||
influxdb "github.com/influxdata/influxdb1-client/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/observability/logs"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"github.com/traefik/traefik/v3/pkg/safe"
|
||||
)
|
||||
|
||||
var (
|
||||
influxDB2Ticker *time.Ticker
|
||||
influxDB2Store *influx.Influx
|
||||
influxDB2Client influxdb2.Client
|
||||
)
|
||||
|
||||
const (
|
||||
influxDBConfigReloadsName = "traefik.config.reload.total"
|
||||
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
|
||||
influxDBOpenConnsName = "traefik.open.connections"
|
||||
|
||||
influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp"
|
||||
|
||||
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
|
||||
influxDBEntryPointReqsTLSName = "traefik.entrypoint.requests.tls.total"
|
||||
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
|
||||
influxDBEntryPointReqsBytesName = "traefik.entrypoint.requests.bytes.total"
|
||||
influxDBEntryPointRespsBytesName = "traefik.entrypoint.responses.bytes.total"
|
||||
|
||||
influxDBRouterReqsName = "traefik.router.requests.total"
|
||||
influxDBRouterReqsTLSName = "traefik.router.requests.tls.total"
|
||||
influxDBRouterReqsDurationName = "traefik.router.request.duration"
|
||||
influxDBRouterReqsBytesName = "traefik.router.requests.bytes.total"
|
||||
influxDBRouterRespsBytesName = "traefik.router.responses.bytes.total"
|
||||
|
||||
influxDBServiceReqsName = "traefik.service.requests.total"
|
||||
influxDBServiceReqsTLSName = "traefik.service.requests.tls.total"
|
||||
influxDBServiceReqsDurationName = "traefik.service.request.duration"
|
||||
influxDBServiceRetriesTotalName = "traefik.service.retries.total"
|
||||
influxDBServiceServerUpName = "traefik.service.server.up"
|
||||
influxDBServiceReqsBytesName = "traefik.service.requests.bytes.total"
|
||||
influxDBServiceRespsBytesName = "traefik.service.responses.bytes.total"
|
||||
)
|
||||
|
||||
// RegisterInfluxDB2 creates metrics exporter for InfluxDB2.
|
||||
func RegisterInfluxDB2(ctx context.Context, config *otypes.InfluxDB2) Registry {
|
||||
logger := log.Ctx(ctx)
|
||||
|
||||
if influxDB2Client == nil {
|
||||
var err error
|
||||
if influxDB2Client, err = newInfluxDB2Client(config); err != nil {
|
||||
logger.Error().Err(err).Send()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if influxDB2Store == nil {
|
||||
influxDB2Store = influx.New(
|
||||
config.AdditionalLabels,
|
||||
influxdb.BatchPointsConfig{},
|
||||
logs.NewGoKitWrapper(*logger),
|
||||
)
|
||||
|
||||
influxDB2Ticker = time.NewTicker(time.Duration(config.PushInterval))
|
||||
|
||||
safe.Go(func() {
|
||||
wc := influxDB2Client.WriteAPIBlocking(config.Org, config.Bucket)
|
||||
influxDB2Store.WriteLoop(ctx, influxDB2Ticker.C, influxDB2Writer{wc: wc})
|
||||
})
|
||||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: influxDB2Store.NewCounter(influxDBConfigReloadsName),
|
||||
lastConfigReloadSuccessGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadSuccessName),
|
||||
openConnectionsGauge: influxDB2Store.NewGauge(influxDBOpenConnsName),
|
||||
tlsCertsNotAfterTimestampGauge: influxDB2Store.NewGauge(influxDBTLSCertsNotAfterTimestampName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = NewCounterWithNoopHeaders(influxDB2Store.NewCounter(influxDBEntryPointReqsName))
|
||||
registry.entryPointReqsTLSCounter = influxDB2Store.NewCounter(influxDBEntryPointReqsTLSName)
|
||||
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBEntryPointReqDurationName), time.Second)
|
||||
registry.entryPointReqsBytesCounter = influxDB2Store.NewCounter(influxDBEntryPointReqsBytesName)
|
||||
registry.entryPointRespsBytesCounter = influxDB2Store.NewCounter(influxDBEntryPointRespsBytesName)
|
||||
}
|
||||
|
||||
if config.AddRoutersLabels {
|
||||
registry.routerEnabled = config.AddRoutersLabels
|
||||
registry.routerReqsCounter = NewCounterWithNoopHeaders(influxDB2Store.NewCounter(influxDBRouterReqsName))
|
||||
registry.routerReqsTLSCounter = influxDB2Store.NewCounter(influxDBRouterReqsTLSName)
|
||||
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBRouterReqsDurationName), time.Second)
|
||||
registry.routerReqsBytesCounter = influxDB2Store.NewCounter(influxDBRouterReqsBytesName)
|
||||
registry.routerRespsBytesCounter = influxDB2Store.NewCounter(influxDBRouterRespsBytesName)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = NewCounterWithNoopHeaders(influxDB2Store.NewCounter(influxDBServiceReqsName))
|
||||
registry.serviceReqsTLSCounter = influxDB2Store.NewCounter(influxDBServiceReqsTLSName)
|
||||
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBServiceReqsDurationName), time.Second)
|
||||
registry.serviceRetriesCounter = influxDB2Store.NewCounter(influxDBServiceRetriesTotalName)
|
||||
registry.serviceServerUpGauge = influxDB2Store.NewGauge(influxDBServiceServerUpName)
|
||||
registry.serviceReqsBytesCounter = influxDB2Store.NewCounter(influxDBServiceReqsBytesName)
|
||||
registry.serviceRespsBytesCounter = influxDB2Store.NewCounter(influxDBServiceRespsBytesName)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// StopInfluxDB2 stops and resets InfluxDB2 client, ticker and store.
|
||||
func StopInfluxDB2() {
|
||||
if influxDB2Client != nil {
|
||||
influxDB2Client.Close()
|
||||
}
|
||||
influxDB2Client = nil
|
||||
|
||||
if influxDB2Ticker != nil {
|
||||
influxDB2Ticker.Stop()
|
||||
}
|
||||
influxDB2Ticker = nil
|
||||
|
||||
influxDB2Store = nil
|
||||
}
|
||||
|
||||
// newInfluxDB2Client creates an influxdb2.Client.
|
||||
func newInfluxDB2Client(config *otypes.InfluxDB2) (influxdb2.Client, error) {
|
||||
if config.Token == "" || config.Org == "" || config.Bucket == "" {
|
||||
return nil, errors.New("token, org or bucket property is missing")
|
||||
}
|
||||
|
||||
// Disable InfluxDB2 logs.
|
||||
// See https://github.com/influxdata/influxdb-client-go/blob/v2.7.0/options.go#L128
|
||||
influxdb2log.Log = nil
|
||||
|
||||
return influxdb2.NewClient(config.Address, config.Token), nil
|
||||
}
|
||||
|
||||
type influxDB2Writer struct {
|
||||
wc influxdb2api.WriteAPIBlocking
|
||||
}
|
||||
|
||||
func (w influxDB2Writer) Write(bp influxdb.BatchPoints) error {
|
||||
logger := log.With().Str(logs.MetricsProviderName, "influxdb2").Logger()
|
||||
|
||||
wps := make([]*write.Point, 0, len(bp.Points()))
|
||||
for _, p := range bp.Points() {
|
||||
fields, err := p.Fields()
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msgf("Error while getting %s point fields", p.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
wps = append(wps, influxdb2.NewPoint(
|
||||
p.Name(),
|
||||
p.Tags(),
|
||||
fields,
|
||||
p.Time(),
|
||||
))
|
||||
}
|
||||
|
||||
ctx := logger.WithContext(context.Background())
|
||||
|
||||
return w.wc.WritePoint(ctx, wps...)
|
||||
}
|
||||
140
pkg/observability/metrics/influxdb2_test.go
Normal file
140
pkg/observability/metrics/influxdb2_test.go
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
ptypes "github.com/traefik/paerser/types"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
)
|
||||
|
||||
func TestInfluxDB2(t *testing.T) {
|
||||
c := make(chan *string)
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
body, err := io.ReadAll(r.Body)
|
||||
require.NoError(t, err)
|
||||
|
||||
bodyStr := string(body)
|
||||
c <- &bodyStr
|
||||
_, _ = fmt.Fprintln(w, "ok")
|
||||
}))
|
||||
|
||||
influxDB2Registry := RegisterInfluxDB2(t.Context(),
|
||||
&otypes.InfluxDB2{
|
||||
Address: ts.URL,
|
||||
Token: "test-token",
|
||||
PushInterval: ptypes.Duration(10 * time.Millisecond),
|
||||
Org: "test-org",
|
||||
Bucket: "test-bucket",
|
||||
AddEntryPointsLabels: true,
|
||||
AddRoutersLabels: true,
|
||||
AddServicesLabels: true,
|
||||
})
|
||||
|
||||
t.Cleanup(func() {
|
||||
StopInfluxDB2()
|
||||
ts.Close()
|
||||
})
|
||||
|
||||
if !influxDB2Registry.IsEpEnabled() || !influxDB2Registry.IsRouterEnabled() || !influxDB2Registry.IsSvcEnabled() {
|
||||
t.Fatalf("InfluxDB2Registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
|
||||
}
|
||||
|
||||
expectedServer := []string{
|
||||
`(traefik\.config\.reload\.total count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`,
|
||||
`(traefik\.open\.connections,entrypoint=test,protocol=TCP value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.ConfigReloadsCounter().Add(1)
|
||||
influxDB2Registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
influxDB2Registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1)
|
||||
msgServer := <-c
|
||||
|
||||
assertMessage(t, *msgServer, expectedServer)
|
||||
|
||||
expectedTLS := []string{
|
||||
`(traefik\.tls\.certs\.notAfterTimestamp,key=value value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
msgTLS := <-c
|
||||
|
||||
assertMessage(t, *msgTLS, expectedTLS)
|
||||
|
||||
expectedEntrypoint := []string{
|
||||
`(traefik\.entrypoint\.requests\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.requests\.tls\.total,entrypoint=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.requests\.bytes\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.responses\.bytes\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.EntryPointReqsCounter().With(nil, "entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
influxDB2Registry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDB2Registry.EntryPointReqsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.EntryPointRespsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
msgEntrypoint := <-c
|
||||
|
||||
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
|
||||
|
||||
expectedRouter := []string{
|
||||
`(traefik\.router\.requests\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.router\.requests\.total,code=404,method=GET,router=demo,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.router\.requests\.tls\.total,router=demo,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
|
||||
`(traefik\.router\.request\.duration,code=200,router=demo,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.router\.requests\.bytes\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.router\.responses\.bytes\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
influxDB2Registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDB2Registry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
msgRouter := <-c
|
||||
|
||||
assertMessage(t, *msgRouter, expectedRouter)
|
||||
|
||||
expectedService := []string{
|
||||
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.requests\.tls\.total,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
|
||||
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.service\.server\.up,service=test,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
`(traefik\.service\.requests\.bytes\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.responses\.bytes\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
influxDB2Registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDB2Registry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
|
||||
influxDB2Registry.ServiceReqsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDB2Registry.ServiceRespsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
msgService := <-c
|
||||
|
||||
assertMessage(t, *msgService, expectedService)
|
||||
|
||||
expectedServiceRetries := []string{
|
||||
`(traefik\.service\.retries\.total,service=test count=2) [\d]{19}`,
|
||||
`(traefik\.service\.retries\.total,service=foobar count=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDB2Registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDB2Registry.ServiceRetriesCounter().With("service", "foobar").Add(1)
|
||||
|
||||
msgServiceRetries := <-c
|
||||
|
||||
assertMessage(t, *msgServiceRetries, expectedServiceRetries)
|
||||
}
|
||||
381
pkg/observability/metrics/metrics.go
Normal file
381
pkg/observability/metrics/metrics.go
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/multi"
|
||||
)
|
||||
|
||||
const defaultMetricsPrefix = "traefik"
|
||||
|
||||
// Registry has to implemented by any system that wants to monitor and expose metrics.
|
||||
type Registry interface {
|
||||
// IsEpEnabled shows whether metrics instrumentation is enabled on entry points.
|
||||
IsEpEnabled() bool
|
||||
// IsRouterEnabled shows whether metrics instrumentation is enabled on routers.
|
||||
IsRouterEnabled() bool
|
||||
// IsSvcEnabled shows whether metrics instrumentation is enabled on services.
|
||||
IsSvcEnabled() bool
|
||||
|
||||
// server metrics
|
||||
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
LastConfigReloadSuccessGauge() metrics.Gauge
|
||||
OpenConnectionsGauge() metrics.Gauge
|
||||
|
||||
// TLS
|
||||
|
||||
TLSCertsNotAfterTimestampGauge() metrics.Gauge
|
||||
|
||||
// entry point metrics
|
||||
|
||||
EntryPointReqsCounter() CounterWithHeaders
|
||||
EntryPointReqsTLSCounter() metrics.Counter
|
||||
EntryPointReqDurationHistogram() ScalableHistogram
|
||||
EntryPointReqsBytesCounter() metrics.Counter
|
||||
EntryPointRespsBytesCounter() metrics.Counter
|
||||
|
||||
// router metrics
|
||||
|
||||
RouterReqsCounter() CounterWithHeaders
|
||||
RouterReqsTLSCounter() metrics.Counter
|
||||
RouterReqDurationHistogram() ScalableHistogram
|
||||
RouterReqsBytesCounter() metrics.Counter
|
||||
RouterRespsBytesCounter() metrics.Counter
|
||||
|
||||
// service metrics
|
||||
|
||||
ServiceReqsCounter() CounterWithHeaders
|
||||
ServiceReqsTLSCounter() metrics.Counter
|
||||
ServiceReqDurationHistogram() ScalableHistogram
|
||||
ServiceRetriesCounter() metrics.Counter
|
||||
ServiceServerUpGauge() metrics.Gauge
|
||||
ServiceReqsBytesCounter() metrics.Counter
|
||||
ServiceRespsBytesCounter() metrics.Counter
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return NewMultiRegistry([]Registry{})
|
||||
}
|
||||
|
||||
// NewMultiRegistry is an implementation of metrics.Registry that wraps multiple registries.
|
||||
// It handles the case when a registry hasn't registered some metric and returns nil.
|
||||
// This allows for feature disparity between the different metric implementations.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
var configReloadsCounter []metrics.Counter
|
||||
var lastConfigReloadSuccessGauge []metrics.Gauge
|
||||
var openConnectionsGauge []metrics.Gauge
|
||||
var tlsCertsNotAfterTimestampGauge []metrics.Gauge
|
||||
var entryPointReqsCounter []CounterWithHeaders
|
||||
var entryPointReqsTLSCounter []metrics.Counter
|
||||
var entryPointReqDurationHistogram []ScalableHistogram
|
||||
var entryPointReqsBytesCounter []metrics.Counter
|
||||
var entryPointRespsBytesCounter []metrics.Counter
|
||||
var routerReqsCounter []CounterWithHeaders
|
||||
var routerReqsTLSCounter []metrics.Counter
|
||||
var routerReqDurationHistogram []ScalableHistogram
|
||||
var routerReqsBytesCounter []metrics.Counter
|
||||
var routerRespsBytesCounter []metrics.Counter
|
||||
var serviceReqsCounter []CounterWithHeaders
|
||||
var serviceReqsTLSCounter []metrics.Counter
|
||||
var serviceReqDurationHistogram []ScalableHistogram
|
||||
var serviceRetriesCounter []metrics.Counter
|
||||
var serviceServerUpGauge []metrics.Gauge
|
||||
var serviceReqsBytesCounter []metrics.Counter
|
||||
var serviceRespsBytesCounter []metrics.Counter
|
||||
|
||||
for _, r := range registries {
|
||||
if r.ConfigReloadsCounter() != nil {
|
||||
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
|
||||
}
|
||||
if r.LastConfigReloadSuccessGauge() != nil {
|
||||
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
|
||||
}
|
||||
if r.OpenConnectionsGauge() != nil {
|
||||
openConnectionsGauge = append(openConnectionsGauge, r.OpenConnectionsGauge())
|
||||
}
|
||||
if r.TLSCertsNotAfterTimestampGauge() != nil {
|
||||
tlsCertsNotAfterTimestampGauge = append(tlsCertsNotAfterTimestampGauge, r.TLSCertsNotAfterTimestampGauge())
|
||||
}
|
||||
if r.EntryPointReqsCounter() != nil {
|
||||
entryPointReqsCounter = append(entryPointReqsCounter, r.EntryPointReqsCounter())
|
||||
}
|
||||
if r.EntryPointReqsTLSCounter() != nil {
|
||||
entryPointReqsTLSCounter = append(entryPointReqsTLSCounter, r.EntryPointReqsTLSCounter())
|
||||
}
|
||||
if r.EntryPointReqDurationHistogram() != nil {
|
||||
entryPointReqDurationHistogram = append(entryPointReqDurationHistogram, r.EntryPointReqDurationHistogram())
|
||||
}
|
||||
if r.EntryPointReqsBytesCounter() != nil {
|
||||
entryPointReqsBytesCounter = append(entryPointReqsBytesCounter, r.EntryPointReqsBytesCounter())
|
||||
}
|
||||
if r.EntryPointRespsBytesCounter() != nil {
|
||||
entryPointRespsBytesCounter = append(entryPointRespsBytesCounter, r.EntryPointRespsBytesCounter())
|
||||
}
|
||||
if r.RouterReqsCounter() != nil {
|
||||
routerReqsCounter = append(routerReqsCounter, r.RouterReqsCounter())
|
||||
}
|
||||
if r.RouterReqsTLSCounter() != nil {
|
||||
routerReqsTLSCounter = append(routerReqsTLSCounter, r.RouterReqsTLSCounter())
|
||||
}
|
||||
if r.RouterReqDurationHistogram() != nil {
|
||||
routerReqDurationHistogram = append(routerReqDurationHistogram, r.RouterReqDurationHistogram())
|
||||
}
|
||||
if r.RouterReqsBytesCounter() != nil {
|
||||
routerReqsBytesCounter = append(routerReqsBytesCounter, r.RouterReqsBytesCounter())
|
||||
}
|
||||
if r.RouterRespsBytesCounter() != nil {
|
||||
routerRespsBytesCounter = append(routerRespsBytesCounter, r.RouterRespsBytesCounter())
|
||||
}
|
||||
if r.ServiceReqsCounter() != nil {
|
||||
serviceReqsCounter = append(serviceReqsCounter, r.ServiceReqsCounter())
|
||||
}
|
||||
if r.ServiceReqsTLSCounter() != nil {
|
||||
serviceReqsTLSCounter = append(serviceReqsTLSCounter, r.ServiceReqsTLSCounter())
|
||||
}
|
||||
if r.ServiceReqDurationHistogram() != nil {
|
||||
serviceReqDurationHistogram = append(serviceReqDurationHistogram, r.ServiceReqDurationHistogram())
|
||||
}
|
||||
if r.ServiceRetriesCounter() != nil {
|
||||
serviceRetriesCounter = append(serviceRetriesCounter, r.ServiceRetriesCounter())
|
||||
}
|
||||
if r.ServiceServerUpGauge() != nil {
|
||||
serviceServerUpGauge = append(serviceServerUpGauge, r.ServiceServerUpGauge())
|
||||
}
|
||||
if r.ServiceReqsBytesCounter() != nil {
|
||||
serviceReqsBytesCounter = append(serviceReqsBytesCounter, r.ServiceReqsBytesCounter())
|
||||
}
|
||||
if r.ServiceRespsBytesCounter() != nil {
|
||||
serviceRespsBytesCounter = append(serviceRespsBytesCounter, r.ServiceRespsBytesCounter())
|
||||
}
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
epEnabled: len(entryPointReqsCounter) > 0 || len(entryPointReqDurationHistogram) > 0,
|
||||
svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0,
|
||||
routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
openConnectionsGauge: multi.NewGauge(openConnectionsGauge...),
|
||||
tlsCertsNotAfterTimestampGauge: multi.NewGauge(tlsCertsNotAfterTimestampGauge...),
|
||||
entryPointReqsCounter: NewMultiCounterWithHeaders(entryPointReqsCounter...),
|
||||
entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...),
|
||||
entryPointReqDurationHistogram: MultiHistogram(entryPointReqDurationHistogram),
|
||||
entryPointReqsBytesCounter: multi.NewCounter(entryPointReqsBytesCounter...),
|
||||
entryPointRespsBytesCounter: multi.NewCounter(entryPointRespsBytesCounter...),
|
||||
routerReqsCounter: NewMultiCounterWithHeaders(routerReqsCounter...),
|
||||
routerReqsTLSCounter: multi.NewCounter(routerReqsTLSCounter...),
|
||||
routerReqDurationHistogram: MultiHistogram(routerReqDurationHistogram),
|
||||
routerReqsBytesCounter: multi.NewCounter(routerReqsBytesCounter...),
|
||||
routerRespsBytesCounter: multi.NewCounter(routerRespsBytesCounter...),
|
||||
serviceReqsCounter: NewMultiCounterWithHeaders(serviceReqsCounter...),
|
||||
serviceReqsTLSCounter: multi.NewCounter(serviceReqsTLSCounter...),
|
||||
serviceReqDurationHistogram: MultiHistogram(serviceReqDurationHistogram),
|
||||
serviceRetriesCounter: multi.NewCounter(serviceRetriesCounter...),
|
||||
serviceServerUpGauge: multi.NewGauge(serviceServerUpGauge...),
|
||||
serviceReqsBytesCounter: multi.NewCounter(serviceReqsBytesCounter...),
|
||||
serviceRespsBytesCounter: multi.NewCounter(serviceRespsBytesCounter...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
epEnabled bool
|
||||
routerEnabled bool
|
||||
svcEnabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
openConnectionsGauge metrics.Gauge
|
||||
tlsCertsNotAfterTimestampGauge metrics.Gauge
|
||||
entryPointReqsCounter CounterWithHeaders
|
||||
entryPointReqsTLSCounter metrics.Counter
|
||||
entryPointReqDurationHistogram ScalableHistogram
|
||||
entryPointReqsBytesCounter metrics.Counter
|
||||
entryPointRespsBytesCounter metrics.Counter
|
||||
routerReqsCounter CounterWithHeaders
|
||||
routerReqsTLSCounter metrics.Counter
|
||||
routerReqDurationHistogram ScalableHistogram
|
||||
routerReqsBytesCounter metrics.Counter
|
||||
routerRespsBytesCounter metrics.Counter
|
||||
serviceReqsCounter CounterWithHeaders
|
||||
serviceReqsTLSCounter metrics.Counter
|
||||
serviceReqDurationHistogram ScalableHistogram
|
||||
serviceRetriesCounter metrics.Counter
|
||||
serviceServerUpGauge metrics.Gauge
|
||||
serviceReqsBytesCounter metrics.Counter
|
||||
serviceRespsBytesCounter metrics.Counter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEpEnabled() bool {
|
||||
return r.epEnabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsRouterEnabled() bool {
|
||||
return r.routerEnabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsSvcEnabled() bool {
|
||||
return r.svcEnabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
||||
return r.configReloadsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadSuccessGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) OpenConnectionsGauge() metrics.Gauge {
|
||||
return r.openConnectionsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) TLSCertsNotAfterTimestampGauge() metrics.Gauge {
|
||||
return r.tlsCertsNotAfterTimestampGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntryPointReqsCounter() CounterWithHeaders {
|
||||
return r.entryPointReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntryPointReqsTLSCounter() metrics.Counter {
|
||||
return r.entryPointReqsTLSCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntryPointReqDurationHistogram() ScalableHistogram {
|
||||
return r.entryPointReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntryPointReqsBytesCounter() metrics.Counter {
|
||||
return r.entryPointReqsBytesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntryPointRespsBytesCounter() metrics.Counter {
|
||||
return r.entryPointRespsBytesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RouterReqsCounter() CounterWithHeaders {
|
||||
return r.routerReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RouterReqsTLSCounter() metrics.Counter {
|
||||
return r.routerReqsTLSCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RouterReqDurationHistogram() ScalableHistogram {
|
||||
return r.routerReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RouterReqsBytesCounter() metrics.Counter {
|
||||
return r.routerReqsBytesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RouterRespsBytesCounter() metrics.Counter {
|
||||
return r.routerRespsBytesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceReqsCounter() CounterWithHeaders {
|
||||
return r.serviceReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceReqsTLSCounter() metrics.Counter {
|
||||
return r.serviceReqsTLSCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceReqDurationHistogram() ScalableHistogram {
|
||||
return r.serviceReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceRetriesCounter() metrics.Counter {
|
||||
return r.serviceRetriesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceServerUpGauge() metrics.Gauge {
|
||||
return r.serviceServerUpGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceReqsBytesCounter() metrics.Counter {
|
||||
return r.serviceReqsBytesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ServiceRespsBytesCounter() metrics.Counter {
|
||||
return r.serviceRespsBytesCounter
|
||||
}
|
||||
|
||||
// ScalableHistogram is a Histogram with a predefined time unit,
|
||||
// used when producing observations without explicitly setting the observed value.
|
||||
type ScalableHistogram interface {
|
||||
With(labelValues ...string) ScalableHistogram
|
||||
Observe(v float64)
|
||||
ObserveFromStart(start time.Time)
|
||||
}
|
||||
|
||||
// HistogramWithScale is a histogram that will convert its observed value to the specified unit.
|
||||
type HistogramWithScale struct {
|
||||
histogram metrics.Histogram
|
||||
unit time.Duration
|
||||
}
|
||||
|
||||
// With implements ScalableHistogram.
|
||||
func (s *HistogramWithScale) With(labelValues ...string) ScalableHistogram {
|
||||
h, _ := NewHistogramWithScale(s.histogram.With(labelValues...), s.unit)
|
||||
return h
|
||||
}
|
||||
|
||||
// ObserveFromStart implements ScalableHistogram.
|
||||
func (s *HistogramWithScale) ObserveFromStart(start time.Time) {
|
||||
if s.unit <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
d := float64(time.Since(start).Nanoseconds()) / float64(s.unit)
|
||||
if d < 0 {
|
||||
d = 0
|
||||
}
|
||||
s.histogram.Observe(d)
|
||||
}
|
||||
|
||||
// Observe implements ScalableHistogram.
|
||||
func (s *HistogramWithScale) Observe(v float64) {
|
||||
s.histogram.Observe(v)
|
||||
}
|
||||
|
||||
// NewHistogramWithScale returns a ScalableHistogram. It returns an error if the given unit is <= 0.
|
||||
func NewHistogramWithScale(histogram metrics.Histogram, unit time.Duration) (ScalableHistogram, error) {
|
||||
if unit <= 0 {
|
||||
return nil, errors.New("invalid time unit")
|
||||
}
|
||||
return &HistogramWithScale{
|
||||
histogram: histogram,
|
||||
unit: unit,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// MultiHistogram collects multiple individual histograms and treats them as a unit.
|
||||
type MultiHistogram []ScalableHistogram
|
||||
|
||||
// ObserveFromStart implements ScalableHistogram.
|
||||
func (h MultiHistogram) ObserveFromStart(start time.Time) {
|
||||
for _, histogram := range h {
|
||||
histogram.ObserveFromStart(start)
|
||||
}
|
||||
}
|
||||
|
||||
// Observe implements ScalableHistogram.
|
||||
func (h MultiHistogram) Observe(v float64) {
|
||||
for _, histogram := range h {
|
||||
histogram.Observe(v)
|
||||
}
|
||||
}
|
||||
|
||||
// With implements ScalableHistogram.
|
||||
func (h MultiHistogram) With(labelValues ...string) ScalableHistogram {
|
||||
next := make(MultiHistogram, len(h))
|
||||
for i := range h {
|
||||
next[i] = h[i].With(labelValues...)
|
||||
}
|
||||
return next
|
||||
}
|
||||
121
pkg/observability/metrics/metrics_test.go
Normal file
121
pkg/observability/metrics/metrics_test.go
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/generic"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestScalableHistogram(t *testing.T) {
|
||||
h := generic.NewHistogram("test", 1)
|
||||
sh, err := NewHistogramWithScale(h, time.Millisecond)
|
||||
require.NoError(t, err)
|
||||
|
||||
ticker := time.NewTicker(500 * time.Millisecond)
|
||||
<-ticker.C
|
||||
start := time.Now()
|
||||
<-ticker.C
|
||||
sh.ObserveFromStart(start)
|
||||
|
||||
var b bytes.Buffer
|
||||
h.Print(&b)
|
||||
|
||||
extractedDurationString := strings.Split(strings.Split(b.String(), "\n")[1], " ")
|
||||
measuredDuration, err := time.ParseDuration(extractedDurationString[0] + "ms")
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.InDelta(t, 500*time.Millisecond, measuredDuration, float64(15*time.Millisecond))
|
||||
}
|
||||
|
||||
func TestNewMultiRegistry(t *testing.T) {
|
||||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.ServiceReqsCounter().With(nil, "key", "requests").Add(1)
|
||||
registry.ServiceReqDurationHistogram().With("key", "durations").Observe(float64(2))
|
||||
registry.ServiceRetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.ServiceReqsCounter().(*counterWithHeadersMock)
|
||||
cReqDurationHistogram := collectingRegistry.ServiceReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.ServiceRetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for ReqsCounter, want %f", cReqsCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
wantHistogramValue := float64(2)
|
||||
if cReqDurationHistogram.lastHistogramValue != wantHistogramValue {
|
||||
t.Errorf("Got last observation %f for ReqDurationHistogram, want %f", cReqDurationHistogram.lastHistogramValue, wantHistogramValue)
|
||||
}
|
||||
wantCounterValue = float64(3)
|
||||
if cRetriesCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for RetriesCounter, want %f", cRetriesCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
|
||||
assert.Equal(t, []string{"key", "requests"}, cReqsCounter.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "durations"}, cReqDurationHistogram.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "retries"}, cRetriesCounter.lastLabelValues)
|
||||
}
|
||||
}
|
||||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
serviceReqsCounter: &counterWithHeadersMock{},
|
||||
serviceReqDurationHistogram: &histogramMock{},
|
||||
serviceRetriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
type counterMock struct {
|
||||
counterValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *counterMock) With(labelValues ...string) metrics.Counter {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *counterMock) Add(delta float64) {
|
||||
c.counterValue += delta
|
||||
}
|
||||
|
||||
type counterWithHeadersMock struct {
|
||||
counterValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *counterWithHeadersMock) With(_ http.Header, labelValues ...string) CounterWithHeaders {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *counterWithHeadersMock) Add(delta float64) {
|
||||
c.counterValue += delta
|
||||
}
|
||||
|
||||
type histogramMock struct {
|
||||
lastHistogramValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *histogramMock) With(labelValues ...string) ScalableHistogram {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *histogramMock) Start() {}
|
||||
|
||||
func (c *histogramMock) ObserveFromStart(t time.Time) {}
|
||||
|
||||
func (c *histogramMock) Observe(v float64) {
|
||||
c.lastHistogramValue = v
|
||||
}
|
||||
513
pkg/observability/metrics/otel.go
Normal file
513
pkg/observability/metrics/otel.go
Normal file
|
|
@ -0,0 +1,513 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/observability"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"github.com/traefik/traefik/v3/pkg/types"
|
||||
"github.com/traefik/traefik/v3/pkg/version"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
|
||||
"go.opentelemetry.io/otel/semconv/v1.37.0/httpconv"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/encoding/gzip"
|
||||
)
|
||||
|
||||
var (
|
||||
openTelemetryMeterProvider *sdkmetric.MeterProvider
|
||||
openTelemetryGaugeCollector *gaugeCollector
|
||||
)
|
||||
|
||||
// SetMeterProvider sets the meter provider for the tests.
|
||||
func SetMeterProvider(meterProvider *sdkmetric.MeterProvider) {
|
||||
openTelemetryMeterProvider = meterProvider
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
}
|
||||
|
||||
// SemConvMetricsRegistry holds stables semantic conventions metric instruments.
|
||||
type SemConvMetricsRegistry struct {
|
||||
// server metrics
|
||||
httpServerRequestDuration httpconv.ServerRequestDuration
|
||||
// client metrics
|
||||
httpClientRequestDuration httpconv.ClientRequestDuration
|
||||
}
|
||||
|
||||
// NewSemConvMetricRegistry registers all stables semantic conventions metrics.
|
||||
func NewSemConvMetricRegistry(ctx context.Context, config *otypes.OTLP) (*SemConvMetricsRegistry, error) {
|
||||
if err := observability.EnsureUserEnvVar(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if openTelemetryMeterProvider == nil {
|
||||
var err error
|
||||
if openTelemetryMeterProvider, err = newOpenTelemetryMeterProvider(ctx, config); err != nil {
|
||||
log.Ctx(ctx).Err(err).Msg("Unable to create OpenTelemetry meter provider")
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
meter := otel.Meter("github.com/traefik/traefik",
|
||||
metric.WithInstrumentationVersion(version.Version))
|
||||
|
||||
httpServerRequestDuration, err := httpconv.NewServerRequestDuration(meter,
|
||||
metric.WithExplicitBucketBoundaries(config.ExplicitBoundaries...))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't build httpServerRequestDuration histogram: %w", err)
|
||||
}
|
||||
|
||||
httpClientRequestDuration, err := httpconv.NewClientRequestDuration(meter,
|
||||
metric.WithExplicitBucketBoundaries(config.ExplicitBoundaries...))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't build httpClientRequestDuration histogram: %w", err)
|
||||
}
|
||||
|
||||
return &SemConvMetricsRegistry{
|
||||
httpServerRequestDuration: httpServerRequestDuration,
|
||||
httpClientRequestDuration: httpClientRequestDuration,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HTTPServerRequestDuration returns the HTTP server request duration histogram.
|
||||
func (s *SemConvMetricsRegistry) HTTPServerRequestDuration() httpconv.ServerRequestDuration {
|
||||
if s == nil {
|
||||
return httpconv.ServerRequestDuration{}
|
||||
}
|
||||
|
||||
return s.httpServerRequestDuration
|
||||
}
|
||||
|
||||
// HTTPClientRequestDuration returns the HTTP client request duration histogram.
|
||||
func (s *SemConvMetricsRegistry) HTTPClientRequestDuration() httpconv.ClientRequestDuration {
|
||||
if s == nil {
|
||||
return httpconv.ClientRequestDuration{}
|
||||
}
|
||||
|
||||
return s.httpClientRequestDuration
|
||||
}
|
||||
|
||||
// RegisterOpenTelemetry registers all OpenTelemetry metrics.
|
||||
func RegisterOpenTelemetry(ctx context.Context, config *otypes.OTLP) Registry {
|
||||
if openTelemetryMeterProvider == nil {
|
||||
var err error
|
||||
if openTelemetryMeterProvider, err = newOpenTelemetryMeterProvider(ctx, config); err != nil {
|
||||
log.Ctx(ctx).Err(err).Msg("Unable to create OpenTelemetry meter provider")
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if openTelemetryGaugeCollector == nil {
|
||||
openTelemetryGaugeCollector = newOpenTelemetryGaugeCollector()
|
||||
}
|
||||
|
||||
meter := otel.Meter("github.com/traefik/traefik",
|
||||
metric.WithInstrumentationVersion(version.Version))
|
||||
|
||||
reg := &standardRegistry{
|
||||
epEnabled: config.AddEntryPointsLabels,
|
||||
routerEnabled: config.AddRoutersLabels,
|
||||
svcEnabled: config.AddServicesLabels,
|
||||
configReloadsCounter: newOTLPCounterFrom(meter, configReloadsTotalName, "Config reloads"),
|
||||
lastConfigReloadSuccessGauge: newOTLPGaugeFrom(meter, configLastReloadSuccessName, "Last config reload success", "ms"),
|
||||
openConnectionsGauge: newOTLPGaugeFrom(meter, openConnectionsName, "How many open connections exist, by entryPoint and protocol", "1"),
|
||||
tlsCertsNotAfterTimestampGauge: newOTLPGaugeFrom(meter, tlsCertsNotAfterTimestampName, "Certificate expiration timestamp", "ms"),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
reg.entryPointReqsCounter = NewCounterWithNoopHeaders(newOTLPCounterFrom(meter, entryPointReqsTotalName,
|
||||
"How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method."))
|
||||
reg.entryPointReqsTLSCounter = newOTLPCounterFrom(meter, entryPointReqsTLSTotalName,
|
||||
"How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.")
|
||||
reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, entryPointReqDurationName,
|
||||
"How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
"s"), time.Second)
|
||||
reg.entryPointReqsBytesCounter = newOTLPCounterFrom(meter, entryPointReqsBytesTotalName,
|
||||
"The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.")
|
||||
reg.entryPointRespsBytesCounter = newOTLPCounterFrom(meter, entryPointRespsBytesTotalName,
|
||||
"The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.")
|
||||
}
|
||||
|
||||
if config.AddRoutersLabels {
|
||||
reg.routerReqsCounter = NewCounterWithNoopHeaders(newOTLPCounterFrom(meter, routerReqsTotalName,
|
||||
"How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method."))
|
||||
reg.routerReqsTLSCounter = newOTLPCounterFrom(meter, routerReqsTLSTotalName,
|
||||
"How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.")
|
||||
reg.routerReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, routerReqDurationName,
|
||||
"How long it took to process the request on a router, partitioned by service, status code, protocol, and method.",
|
||||
"s"), time.Second)
|
||||
reg.routerReqsBytesCounter = newOTLPCounterFrom(meter, routerReqsBytesTotalName,
|
||||
"The total size of requests in bytes handled by a router, partitioned by status code, protocol, and method.")
|
||||
reg.routerRespsBytesCounter = newOTLPCounterFrom(meter, routerRespsBytesTotalName,
|
||||
"The total size of responses in bytes handled by a router, partitioned by status code, protocol, and method.")
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
reg.serviceReqsCounter = NewCounterWithNoopHeaders(newOTLPCounterFrom(meter, serviceReqsTotalName,
|
||||
"How many HTTP requests processed on a service, partitioned by status code, protocol, and method."))
|
||||
reg.serviceReqsTLSCounter = newOTLPCounterFrom(meter, serviceReqsTLSTotalName,
|
||||
"How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.")
|
||||
reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, serviceReqDurationName,
|
||||
"How long it took to process the request on a service, partitioned by status code, protocol, and method.",
|
||||
"s"), time.Second)
|
||||
reg.serviceRetriesCounter = newOTLPCounterFrom(meter, serviceRetriesTotalName,
|
||||
"How many request retries happened on a service.")
|
||||
reg.serviceServerUpGauge = newOTLPGaugeFrom(meter, serviceServerUpName,
|
||||
"service server is up, described by gauge value of 0 or 1.",
|
||||
"1")
|
||||
reg.serviceReqsBytesCounter = newOTLPCounterFrom(meter, serviceReqsBytesTotalName,
|
||||
"The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.")
|
||||
reg.serviceRespsBytesCounter = newOTLPCounterFrom(meter, serviceRespsBytesTotalName,
|
||||
"The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.")
|
||||
}
|
||||
|
||||
return reg
|
||||
}
|
||||
|
||||
// StopOpenTelemetry stops and resets Open-Telemetry client.
|
||||
func StopOpenTelemetry() {
|
||||
if openTelemetryMeterProvider == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := openTelemetryMeterProvider.Shutdown(ctx); err != nil {
|
||||
log.Err(err).Msg("Unable to shutdown OpenTelemetry meter provider")
|
||||
}
|
||||
|
||||
openTelemetryMeterProvider = nil
|
||||
}
|
||||
|
||||
// newOpenTelemetryMeterProvider creates a new controller.Controller.
|
||||
func newOpenTelemetryMeterProvider(ctx context.Context, config *otypes.OTLP) (*sdkmetric.MeterProvider, error) {
|
||||
var (
|
||||
exporter sdkmetric.Exporter
|
||||
err error
|
||||
)
|
||||
if config.GRPC != nil {
|
||||
exporter, err = newGRPCExporter(ctx, config.GRPC)
|
||||
} else {
|
||||
exporter, err = newHTTPExporter(ctx, config.HTTP)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating exporter: %w", err)
|
||||
}
|
||||
|
||||
var resAttrs []attribute.KeyValue
|
||||
for k, v := range config.ResourceAttributes {
|
||||
resAttrs = append(resAttrs, attribute.String(k, v))
|
||||
}
|
||||
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithContainer(),
|
||||
resource.WithHost(),
|
||||
resource.WithOS(),
|
||||
resource.WithProcess(),
|
||||
resource.WithTelemetrySDK(),
|
||||
resource.WithDetectors(types.K8sAttributesDetector{}),
|
||||
// The following order allows the user to override the service name and version,
|
||||
// as well as any other attributes set by the above detectors.
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(config.ServiceName),
|
||||
semconv.ServiceVersion(version.Version),
|
||||
),
|
||||
resource.WithAttributes(resAttrs...),
|
||||
// Use the environment variables to allow overriding above resource attributes.
|
||||
resource.WithFromEnv(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("building resource: %w", err)
|
||||
}
|
||||
|
||||
opts := []sdkmetric.PeriodicReaderOption{
|
||||
sdkmetric.WithInterval(time.Duration(config.PushInterval)),
|
||||
}
|
||||
|
||||
meterProvider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter, opts...)),
|
||||
// View to customize histogram buckets and rename a single histogram instrument.
|
||||
sdkmetric.WithView(sdkmetric.NewView(
|
||||
sdkmetric.Instrument{Name: "traefik_*_request_duration_seconds"},
|
||||
sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
|
||||
Boundaries: config.ExplicitBoundaries,
|
||||
}},
|
||||
)),
|
||||
)
|
||||
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
|
||||
return meterProvider, nil
|
||||
}
|
||||
|
||||
func newHTTPExporter(ctx context.Context, config *otypes.OTelHTTP) (sdkmetric.Exporter, error) {
|
||||
endpoint, err := url.Parse(config.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", config.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlpmetrichttp.Option{
|
||||
otlpmetrichttp.WithEndpoint(endpoint.Host),
|
||||
otlpmetrichttp.WithHeaders(config.Headers),
|
||||
otlpmetrichttp.WithCompression(otlpmetrichttp.GzipCompression),
|
||||
}
|
||||
|
||||
if endpoint.Scheme == "http" {
|
||||
opts = append(opts, otlpmetrichttp.WithInsecure())
|
||||
}
|
||||
|
||||
if endpoint.Path != "" {
|
||||
opts = append(opts, otlpmetrichttp.WithURLPath(endpoint.Path))
|
||||
}
|
||||
|
||||
if config.TLS != nil {
|
||||
tlsConfig, err := config.TLS.CreateTLSConfig(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlpmetrichttp.WithTLSClientConfig(tlsConfig))
|
||||
}
|
||||
|
||||
return otlpmetrichttp.New(ctx, opts...)
|
||||
}
|
||||
|
||||
func newGRPCExporter(ctx context.Context, config *otypes.OTelGRPC) (sdkmetric.Exporter, error) {
|
||||
host, port, err := net.SplitHostPort(config.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", config.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlpmetricgrpc.Option{
|
||||
otlpmetricgrpc.WithEndpoint(fmt.Sprintf("%s:%s", host, port)),
|
||||
otlpmetricgrpc.WithHeaders(config.Headers),
|
||||
otlpmetricgrpc.WithCompressor(gzip.Name),
|
||||
}
|
||||
|
||||
if config.Insecure {
|
||||
opts = append(opts, otlpmetricgrpc.WithInsecure())
|
||||
}
|
||||
|
||||
if config.TLS != nil {
|
||||
tlsConfig, err := config.TLS.CreateTLSConfig(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlpmetricgrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
|
||||
}
|
||||
|
||||
return otlpmetricgrpc.New(ctx, opts...)
|
||||
}
|
||||
|
||||
func newOTLPCounterFrom(meter metric.Meter, name, desc string) *otelCounter {
|
||||
c, _ := meter.Float64Counter(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithUnit("1"),
|
||||
)
|
||||
|
||||
return &otelCounter{
|
||||
ip: c,
|
||||
}
|
||||
}
|
||||
|
||||
type otelCounter struct {
|
||||
labelNamesValues otelLabelNamesValues
|
||||
ip metric.Float64Counter
|
||||
}
|
||||
|
||||
func (c *otelCounter) With(labelValues ...string) metrics.Counter {
|
||||
return &otelCounter{
|
||||
labelNamesValues: c.labelNamesValues.With(labelValues...),
|
||||
ip: c.ip,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *otelCounter) Add(delta float64) {
|
||||
c.ip.Add(context.Background(), delta, metric.WithAttributes(c.labelNamesValues.ToLabels()...))
|
||||
}
|
||||
|
||||
type gaugeValue struct {
|
||||
attributes otelLabelNamesValues
|
||||
value float64
|
||||
}
|
||||
|
||||
type gaugeCollector struct {
|
||||
mu sync.Mutex
|
||||
values map[string]map[string]gaugeValue
|
||||
}
|
||||
|
||||
func newOpenTelemetryGaugeCollector() *gaugeCollector {
|
||||
return &gaugeCollector{
|
||||
values: make(map[string]map[string]gaugeValue),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *gaugeCollector) add(name string, delta float64, attributes otelLabelNamesValues) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
str := strings.Join(attributes, "")
|
||||
|
||||
if _, exists := c.values[name]; !exists {
|
||||
c.values[name] = map[string]gaugeValue{
|
||||
str: {
|
||||
attributes: attributes,
|
||||
value: delta,
|
||||
},
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
v, exists := c.values[name][str]
|
||||
if !exists {
|
||||
c.values[name][str] = gaugeValue{
|
||||
attributes: attributes,
|
||||
value: delta,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
c.values[name][str] = gaugeValue{
|
||||
attributes: attributes,
|
||||
value: v.value + delta,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *gaugeCollector) set(name string, value float64, attributes otelLabelNamesValues) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if _, exists := c.values[name]; !exists {
|
||||
c.values[name] = make(map[string]gaugeValue)
|
||||
}
|
||||
|
||||
c.values[name][strings.Join(attributes, "")] = gaugeValue{
|
||||
attributes: attributes,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
func newOTLPGaugeFrom(meter metric.Meter, name, desc string, unit string) *otelGauge {
|
||||
openTelemetryGaugeCollector.values[name] = make(map[string]gaugeValue)
|
||||
|
||||
c, _ := meter.Float64ObservableGauge(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithUnit(unit),
|
||||
)
|
||||
|
||||
_, err := meter.RegisterCallback(func(ctx context.Context, observer metric.Observer) error {
|
||||
openTelemetryGaugeCollector.mu.Lock()
|
||||
defer openTelemetryGaugeCollector.mu.Unlock()
|
||||
|
||||
values, exists := openTelemetryGaugeCollector.values[name]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, value := range values {
|
||||
observer.ObserveFloat64(c, value.value, metric.WithAttributes(value.attributes.ToLabels()...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}, c)
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Unable to register OpenTelemetry meter callback")
|
||||
}
|
||||
|
||||
return &otelGauge{
|
||||
ip: c,
|
||||
name: name,
|
||||
}
|
||||
}
|
||||
|
||||
type otelGauge struct {
|
||||
labelNamesValues otelLabelNamesValues
|
||||
ip metric.Float64ObservableGauge
|
||||
name string
|
||||
}
|
||||
|
||||
func (g *otelGauge) With(labelValues ...string) metrics.Gauge {
|
||||
return &otelGauge{
|
||||
labelNamesValues: g.labelNamesValues.With(labelValues...),
|
||||
ip: g.ip,
|
||||
name: g.name,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *otelGauge) Add(delta float64) {
|
||||
openTelemetryGaugeCollector.add(g.name, delta, g.labelNamesValues)
|
||||
}
|
||||
|
||||
func (g *otelGauge) Set(value float64) {
|
||||
openTelemetryGaugeCollector.set(g.name, value, g.labelNamesValues)
|
||||
}
|
||||
|
||||
func newOTLPHistogramFrom(meter metric.Meter, name, desc string, unit string) *otelHistogram {
|
||||
c, _ := meter.Float64Histogram(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithUnit(unit),
|
||||
)
|
||||
|
||||
return &otelHistogram{
|
||||
ip: c,
|
||||
}
|
||||
}
|
||||
|
||||
type otelHistogram struct {
|
||||
labelNamesValues otelLabelNamesValues
|
||||
ip metric.Float64Histogram
|
||||
}
|
||||
|
||||
func (h *otelHistogram) With(labelValues ...string) metrics.Histogram {
|
||||
return &otelHistogram{
|
||||
labelNamesValues: h.labelNamesValues.With(labelValues...),
|
||||
ip: h.ip,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *otelHistogram) Observe(incr float64) {
|
||||
h.ip.Record(context.Background(), incr, metric.WithAttributes(h.labelNamesValues.ToLabels()...))
|
||||
}
|
||||
|
||||
// otelLabelNamesValues is the equivalent of prometheus' labelNamesValues
|
||||
// but adapted to OpenTelemetry.
|
||||
// otelLabelNamesValues is a type alias that provides validation on its With
|
||||
// method.
|
||||
// Metrics may include it as a member to help them satisfy With semantics and
|
||||
// save some code duplication.
|
||||
type otelLabelNamesValues []string
|
||||
|
||||
// With validates the input, and returns a new aggregate otelLabelNamesValues.
|
||||
func (lvs otelLabelNamesValues) With(labelValues ...string) otelLabelNamesValues {
|
||||
if len(labelValues)%2 != 0 {
|
||||
labelValues = append(labelValues, "unknown")
|
||||
}
|
||||
return append(lvs, labelValues...)
|
||||
}
|
||||
|
||||
// ToLabels is a convenience method to convert a otelLabelNamesValues
|
||||
// to the native attribute.KeyValue.
|
||||
func (lvs otelLabelNamesValues) ToLabels() []attribute.KeyValue {
|
||||
labels := make([]attribute.KeyValue, len(lvs)/2)
|
||||
for i := 0; i < len(labels); i++ {
|
||||
labels[i] = attribute.String(lvs[2*i], lvs[2*i+1])
|
||||
}
|
||||
return labels
|
||||
}
|
||||
492
pkg/observability/metrics/otel_test.go
Normal file
492
pkg/observability/metrics/otel_test.go
Normal file
|
|
@ -0,0 +1,492 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
ptypes "github.com/traefik/paerser/types"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"github.com/traefik/traefik/v3/pkg/version"
|
||||
"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
func TestOpenTelemetry_labels(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
values otelLabelNamesValues
|
||||
with []string
|
||||
expect []attribute.KeyValue
|
||||
}{
|
||||
{
|
||||
desc: "with no starting value",
|
||||
values: otelLabelNamesValues{},
|
||||
expect: []attribute.KeyValue{},
|
||||
},
|
||||
{
|
||||
desc: "with one starting value",
|
||||
values: otelLabelNamesValues{"foo"},
|
||||
expect: []attribute.KeyValue{},
|
||||
},
|
||||
{
|
||||
desc: "with two starting value",
|
||||
values: otelLabelNamesValues{"foo", "bar"},
|
||||
expect: []attribute.KeyValue{attribute.String("foo", "bar")},
|
||||
},
|
||||
{
|
||||
desc: "with no starting value, and with one other value",
|
||||
values: otelLabelNamesValues{},
|
||||
with: []string{"baz"},
|
||||
expect: []attribute.KeyValue{attribute.String("baz", "unknown")},
|
||||
},
|
||||
{
|
||||
desc: "with no starting value, and with two other value",
|
||||
values: otelLabelNamesValues{},
|
||||
with: []string{"baz", "buz"},
|
||||
expect: []attribute.KeyValue{attribute.String("baz", "buz")},
|
||||
},
|
||||
{
|
||||
desc: "with one starting value, and with one other value",
|
||||
values: otelLabelNamesValues{"foo"},
|
||||
with: []string{"baz"},
|
||||
expect: []attribute.KeyValue{attribute.String("foo", "baz")},
|
||||
},
|
||||
{
|
||||
desc: "with one starting value, and with two other value",
|
||||
values: otelLabelNamesValues{"foo"},
|
||||
with: []string{"baz", "buz"},
|
||||
expect: []attribute.KeyValue{attribute.String("foo", "baz")},
|
||||
},
|
||||
{
|
||||
desc: "with two starting value, and with one other value",
|
||||
values: otelLabelNamesValues{"foo", "bar"},
|
||||
with: []string{"baz"},
|
||||
expect: []attribute.KeyValue{
|
||||
attribute.String("foo", "bar"),
|
||||
attribute.String("baz", "unknown"),
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "with two starting value, and with two other value",
|
||||
values: otelLabelNamesValues{"foo", "bar"},
|
||||
with: []string{"baz", "buz"},
|
||||
expect: []attribute.KeyValue{
|
||||
attribute.String("foo", "bar"),
|
||||
attribute.String("baz", "buz"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
assert.Equal(t, test.expect, test.values.With(test.with...).ToLabels())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenTelemetry_GaugeCollectorAdd(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
gc *gaugeCollector
|
||||
delta float64
|
||||
name string
|
||||
attributes otelLabelNamesValues
|
||||
expect map[string]map[string]gaugeValue
|
||||
}{
|
||||
{
|
||||
desc: "empty collector",
|
||||
gc: newOpenTelemetryGaugeCollector(),
|
||||
delta: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
delta: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 2}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector, values with label (only the last one counts)",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"bar": {
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
delta: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"": {
|
||||
value: 1,
|
||||
},
|
||||
"bar": {
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector, values with label on set",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {"bar": {value: 1}},
|
||||
},
|
||||
},
|
||||
delta: 1,
|
||||
name: "foo",
|
||||
attributes: otelLabelNamesValues{"baz"},
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"bar": {
|
||||
value: 1,
|
||||
},
|
||||
"baz": {
|
||||
value: 1,
|
||||
attributes: otelLabelNamesValues{"baz"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
test.gc.add(test.name, test.delta, test.attributes)
|
||||
|
||||
assert.Equal(t, test.expect, test.gc.values)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenTelemetry_GaugeCollectorSet(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
gc *gaugeCollector
|
||||
value float64
|
||||
name string
|
||||
attributes otelLabelNamesValues
|
||||
expect map[string]map[string]gaugeValue
|
||||
}{
|
||||
{
|
||||
desc: "empty collector",
|
||||
gc: newOpenTelemetryGaugeCollector(),
|
||||
value: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
value: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector, values with label",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"bar": {
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
value: 1,
|
||||
name: "foo",
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"": {
|
||||
value: 1,
|
||||
},
|
||||
"bar": {
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "initialized collector, values with label on set",
|
||||
gc: &gaugeCollector{
|
||||
values: map[string]map[string]gaugeValue{
|
||||
"foo": {"": {value: 1}},
|
||||
},
|
||||
},
|
||||
value: 1,
|
||||
name: "foo",
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
expect: map[string]map[string]gaugeValue{
|
||||
"foo": {
|
||||
"": {
|
||||
value: 1,
|
||||
},
|
||||
"bar": {
|
||||
value: 1,
|
||||
attributes: otelLabelNamesValues{"bar"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
test.gc.set(test.name, test.value, test.attributes)
|
||||
|
||||
assert.Equal(t, test.expect, test.gc.values)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenTelemetry(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
serviceName string
|
||||
}{
|
||||
{
|
||||
desc: "default",
|
||||
},
|
||||
{
|
||||
desc: "custom-service-name",
|
||||
serviceName: "custom-service-name",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
c := make(chan *string, 5)
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gzr, err := gzip.NewReader(r.Body)
|
||||
require.NoError(t, err)
|
||||
|
||||
body, err := io.ReadAll(gzr)
|
||||
require.NoError(t, err)
|
||||
|
||||
req := pmetricotlp.NewExportRequest()
|
||||
err = req.UnmarshalProto(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
marshalledReq, err := json.Marshal(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
bodyStr := string(marshalledReq)
|
||||
c <- &bodyStr
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
|
||||
t.Cleanup(func() {
|
||||
StopOpenTelemetry()
|
||||
ts.Close()
|
||||
})
|
||||
|
||||
var cfg otypes.OTLP
|
||||
(&cfg).SetDefaults()
|
||||
cfg.AddRoutersLabels = true
|
||||
cfg.HTTP = &otypes.OTelHTTP{
|
||||
Endpoint: ts.URL,
|
||||
}
|
||||
cfg.PushInterval = ptypes.Duration(10 * time.Millisecond)
|
||||
|
||||
wantServiceName := "traefik"
|
||||
if test.serviceName != "" {
|
||||
cfg.ServiceName = test.serviceName
|
||||
wantServiceName = test.serviceName
|
||||
}
|
||||
|
||||
registry := RegisterOpenTelemetry(t.Context(), &cfg)
|
||||
require.NotNil(t, registry)
|
||||
|
||||
if !registry.IsEpEnabled() || !registry.IsRouterEnabled() || !registry.IsSvcEnabled() {
|
||||
t.Fatalf("registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
`({"key":"service.name","value":{"stringValue":"` + wantServiceName + `"}})`,
|
||||
`({"key":"service.version","value":{"stringValue":"` + version.Version + `"}})`,
|
||||
}
|
||||
|
||||
tryAssertMessage(t, c, expected)
|
||||
|
||||
// TODO: the len of startUnixNano is no supposed to be 20, it should be 19
|
||||
expectedConfig := []string{
|
||||
`({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
`({"name":"traefik_open_connections","description":"How many open connections exist, by entryPoint and protocol","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test"}},{"key":"protocol","value":{"stringValue":"TCP"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
}
|
||||
|
||||
registry.ConfigReloadsCounter().Add(1)
|
||||
registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedConfig)
|
||||
|
||||
expectedTLSCerts := []string{
|
||||
`({"name":"traefik_tls_certs_not_after","description":"Certificate expiration timestamp","unit":"ms","gauge":{"dataPoints":\[{"attributes":\[{"key":"key","value":{"stringValue":"value"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
}
|
||||
|
||||
registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedTLSCerts)
|
||||
|
||||
expectedEntryPoints := []string{
|
||||
`({"name":"traefik_entrypoint_requests_total","description":"How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_entrypoint_requests_tls_total","description":"How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test2"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_entrypoint_request_duration_seconds","description":"How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.","unit":"s","histogram":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test3"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`,
|
||||
`({"name":"traefik_entrypoint_requests_bytes_total","description":"The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_entrypoint_responses_bytes_total","description":"The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
}
|
||||
|
||||
registry.EntryPointReqsCounter().With(nil, "entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.EntryPointReqsTLSCounter().With("entrypoint", "test2", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.EntryPointReqDurationHistogram().With("entrypoint", "test3").Observe(10000)
|
||||
registry.EntryPointReqsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.EntryPointRespsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedEntryPoints)
|
||||
|
||||
expectedRouters := []string{
|
||||
`({"name":"traefik_router_requests_total","description":"How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_router_requests_tls_total","description":"How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_router_request_duration_seconds","description":"How long it took to process the request on a router, partitioned by service, status code, protocol, and method.","unit":"s","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`,
|
||||
`({"name":"traefik_router_requests_bytes_total","description":"The total size of requests in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_router_responses_bytes_total","description":"The total size of responses in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
}
|
||||
|
||||
registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
registry.RouterReqsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.RouterRespsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedRouters)
|
||||
|
||||
expectedServices := []string{
|
||||
`({"name":"traefik_service_requests_total","description":"How many HTTP requests processed on a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_service_requests_tls_total","description":"How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_service_request_duration_seconds","description":"How long it took to process the request on a service, partitioned by status code, protocol, and method.","unit":"s","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`,
|
||||
`({"name":"traefik_service_server_up","description":"service server is up, described by gauge value of 0 or 1.","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"url","value":{"stringValue":"http://127.0.0.1"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
`({"name":"traefik_service_requests_bytes_total","description":"The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_service_responses_bytes_total","description":"The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
}
|
||||
|
||||
registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
registry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
|
||||
registry.ServiceReqsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceRespsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedServices)
|
||||
|
||||
expectedServicesRetries := []string{
|
||||
`({"attributes":\[{"key":"service","value":{"stringValue":"foobar"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1})`,
|
||||
`({"attributes":\[{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":2})`,
|
||||
}
|
||||
|
||||
registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
registry.ServiceRetriesCounter().With("service", "foobar").Add(1)
|
||||
|
||||
tryAssertMessage(t, c, expectedServicesRetries)
|
||||
|
||||
// We cannot rely on the previous expected pattern,
|
||||
// because this pattern was for matching only one dataPoint in the histogram,
|
||||
// and as soon as the EntryPointReqDurationHistogram.Observe is called,
|
||||
// it adds a new dataPoint to the histogram.
|
||||
expectedEntryPointReqDuration := []string{
|
||||
`({"attributes":\[{"key":"entrypoint","value":{"stringValue":"myEntrypoint"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"2","sum":30000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","2"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":20000})`,
|
||||
}
|
||||
|
||||
registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(10000)
|
||||
registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(20000)
|
||||
|
||||
tryAssertMessage(t, c, expectedEntryPointReqDuration)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func assertMessage(t *testing.T, msg string, expected []string) {
|
||||
t.Helper()
|
||||
errs := verifyMessage(msg, expected)
|
||||
for _, err := range errs {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
func tryAssertMessage(t *testing.T, c chan *string, expected []string) {
|
||||
t.Helper()
|
||||
|
||||
var errs []error
|
||||
timeout := time.After(1 * time.Second)
|
||||
for {
|
||||
select {
|
||||
case <-timeout:
|
||||
for _, err := range errs {
|
||||
t.Error(err)
|
||||
}
|
||||
case msg := <-c:
|
||||
errs = verifyMessage(*msg, expected)
|
||||
if len(errs) == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func verifyMessage(msg string, expected []string) []error {
|
||||
var errs []error
|
||||
for _, pattern := range expected {
|
||||
re := regexp.MustCompile(pattern)
|
||||
match := re.FindStringSubmatch(msg)
|
||||
if len(match) != 2 {
|
||||
errs = append(errs, fmt.Errorf("got %q %v, want %q", msg, match, pattern))
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
672
pkg/observability/metrics/prometheus.go
Normal file
672
pkg/observability/metrics/prometheus.go
Normal file
|
|
@ -0,0 +1,672 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/config/dynamic"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// MetricNamePrefix prefix of all metric names.
|
||||
MetricNamePrefix = "traefik_"
|
||||
|
||||
// server meta information.
|
||||
metricConfigPrefix = MetricNamePrefix + "config_"
|
||||
configReloadsTotalName = metricConfigPrefix + "reloads_total"
|
||||
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
||||
openConnectionsName = MetricNamePrefix + "open_connections"
|
||||
|
||||
// TLS.
|
||||
metricsTLSPrefix = MetricNamePrefix + "tls_"
|
||||
tlsCertsNotAfterTimestampName = metricsTLSPrefix + "certs_not_after"
|
||||
|
||||
// entry point.
|
||||
metricEntryPointPrefix = MetricNamePrefix + "entrypoint_"
|
||||
entryPointReqsTotalName = metricEntryPointPrefix + "requests_total"
|
||||
entryPointReqsTLSTotalName = metricEntryPointPrefix + "requests_tls_total"
|
||||
entryPointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
|
||||
entryPointReqsBytesTotalName = metricEntryPointPrefix + "requests_bytes_total"
|
||||
entryPointRespsBytesTotalName = metricEntryPointPrefix + "responses_bytes_total"
|
||||
|
||||
// router level.
|
||||
metricRouterPrefix = MetricNamePrefix + "router_"
|
||||
routerReqsTotalName = metricRouterPrefix + "requests_total"
|
||||
routerReqsTLSTotalName = metricRouterPrefix + "requests_tls_total"
|
||||
routerReqDurationName = metricRouterPrefix + "request_duration_seconds"
|
||||
routerReqsBytesTotalName = metricRouterPrefix + "requests_bytes_total"
|
||||
routerRespsBytesTotalName = metricRouterPrefix + "responses_bytes_total"
|
||||
|
||||
// service level.
|
||||
metricServicePrefix = MetricNamePrefix + "service_"
|
||||
serviceReqsTotalName = metricServicePrefix + "requests_total"
|
||||
serviceReqsTLSTotalName = metricServicePrefix + "requests_tls_total"
|
||||
serviceReqDurationName = metricServicePrefix + "request_duration_seconds"
|
||||
serviceRetriesTotalName = metricServicePrefix + "retries_total"
|
||||
serviceServerUpName = metricServicePrefix + "server_up"
|
||||
serviceReqsBytesTotalName = metricServicePrefix + "requests_bytes_total"
|
||||
serviceRespsBytesTotalName = metricServicePrefix + "responses_bytes_total"
|
||||
)
|
||||
|
||||
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
||||
//
|
||||
// This enables control to remove metrics that belong to outdated configuration.
|
||||
// As an example why this is required, consider Traefik learns about a new service.
|
||||
// It populates the 'traefik_server_service_up' metric for it with a value of 1 (alive).
|
||||
// When the service is undeployed now the metric is still there in the client library
|
||||
// and will be returned on the metrics endpoint until Traefik would be restarted.
|
||||
//
|
||||
// To solve this problem promState keeps track of Traefik's dynamic configuration.
|
||||
// Metrics that "belong" to a dynamic configuration part like services or entryPoints
|
||||
// are removed after they were scraped at least once when the corresponding object
|
||||
// doesn't exist anymore.
|
||||
var promState = newPrometheusState()
|
||||
|
||||
var promRegistry = stdprometheus.NewRegistry()
|
||||
|
||||
// PrometheusHandler exposes Prometheus routes.
|
||||
func PrometheusHandler() http.Handler {
|
||||
return promhttp.HandlerFor(promRegistry, promhttp.HandlerOpts{})
|
||||
}
|
||||
|
||||
// RegisterPrometheus registers all Prometheus metrics.
|
||||
// It must be called only once and failing to register the metrics will lead to a panic.
|
||||
func RegisterPrometheus(ctx context.Context, config *otypes.Prometheus) Registry {
|
||||
standardRegistry := initStandardRegistry(config)
|
||||
|
||||
if err := promRegistry.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
|
||||
var arErr stdprometheus.AlreadyRegisteredError
|
||||
if !errors.As(err, &arErr) {
|
||||
log.Ctx(ctx).Warn().Msg("ProcessCollector is already registered")
|
||||
}
|
||||
}
|
||||
|
||||
if err := promRegistry.Register(collectors.NewGoCollector()); err != nil {
|
||||
var arErr stdprometheus.AlreadyRegisteredError
|
||||
if !errors.As(err, &arErr) {
|
||||
log.Ctx(ctx).Warn().Msg("GoCollector is already registered")
|
||||
}
|
||||
}
|
||||
|
||||
if !registerPromState(ctx) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return standardRegistry
|
||||
}
|
||||
|
||||
func initStandardRegistry(config *otypes.Prometheus) Registry {
|
||||
buckets := []float64{0.1, 0.3, 1.2, 5.0}
|
||||
if config.Buckets != nil {
|
||||
buckets = config.Buckets
|
||||
}
|
||||
|
||||
configReloads := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: configReloadsTotalName,
|
||||
Help: "Config reloads",
|
||||
}, []string{})
|
||||
lastConfigReloadSuccess := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadSuccessName,
|
||||
Help: "Last config reload success",
|
||||
}, []string{})
|
||||
tlsCertsNotAfterTimestamp := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: tlsCertsNotAfterTimestampName,
|
||||
Help: "Certificate expiration timestamp",
|
||||
}, []string{"cn", "serial", "sans"})
|
||||
openConnections := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: openConnectionsName,
|
||||
Help: "How many open connections exist, by entryPoint and protocol",
|
||||
}, []string{"entrypoint", "protocol"})
|
||||
|
||||
promState.vectors = []vector{
|
||||
configReloads.cv,
|
||||
lastConfigReloadSuccess.gv,
|
||||
tlsCertsNotAfterTimestamp.gv,
|
||||
openConnections.gv,
|
||||
}
|
||||
|
||||
reg := &standardRegistry{
|
||||
epEnabled: config.AddEntryPointsLabels,
|
||||
routerEnabled: config.AddRoutersLabels,
|
||||
svcEnabled: config.AddServicesLabels,
|
||||
configReloadsCounter: configReloads,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimestamp,
|
||||
openConnectionsGauge: openConnections,
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
entryPointReqs := newCounterWithHeadersFrom(stdprometheus.CounterOpts{
|
||||
Name: entryPointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, config.HeaderLabels, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entryPointReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: entryPointReqsTLSTotalName,
|
||||
Help: "How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.",
|
||||
}, []string{"tls_version", "tls_cipher", "entrypoint"})
|
||||
entryPointReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: entryPointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entryPointReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: entryPointReqsBytesTotalName,
|
||||
Help: "The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entryPointRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: entryPointRespsBytesTotalName,
|
||||
Help: "The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
|
||||
promState.vectors = append(promState.vectors,
|
||||
entryPointReqs.cv,
|
||||
entryPointReqsTLS.cv,
|
||||
entryPointReqDurations.hv,
|
||||
entryPointReqsBytesTotal.cv,
|
||||
entryPointRespsBytesTotal.cv,
|
||||
)
|
||||
|
||||
reg.entryPointReqsCounter = entryPointReqs
|
||||
reg.entryPointReqsTLSCounter = entryPointReqsTLS
|
||||
reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second)
|
||||
reg.entryPointReqsBytesCounter = entryPointReqsBytesTotal
|
||||
reg.entryPointRespsBytesCounter = entryPointRespsBytesTotal
|
||||
}
|
||||
|
||||
if config.AddRoutersLabels {
|
||||
routerReqs := newCounterWithHeadersFrom(stdprometheus.CounterOpts{
|
||||
Name: routerReqsTotalName,
|
||||
Help: "How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.",
|
||||
}, config.HeaderLabels, []string{"code", "method", "protocol", "router", "service"})
|
||||
routerReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: routerReqsTLSTotalName,
|
||||
Help: "How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.",
|
||||
}, []string{"tls_version", "tls_cipher", "router", "service"})
|
||||
routerReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: routerReqDurationName,
|
||||
Help: "How long it took to process the request on a router, partitioned by service, status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "router", "service"})
|
||||
routerReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: routerReqsBytesTotalName,
|
||||
Help: "The total size of requests in bytes handled by a router, partitioned by service, status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "router", "service"})
|
||||
routerRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: routerRespsBytesTotalName,
|
||||
Help: "The total size of responses in bytes handled by a router, partitioned by service, status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "router", "service"})
|
||||
|
||||
promState.vectors = append(promState.vectors,
|
||||
routerReqs.cv,
|
||||
routerReqsTLS.cv,
|
||||
routerReqDurations.hv,
|
||||
routerReqsBytesTotal.cv,
|
||||
routerRespsBytesTotal.cv,
|
||||
)
|
||||
reg.routerReqsCounter = routerReqs
|
||||
reg.routerReqsTLSCounter = routerReqsTLS
|
||||
reg.routerReqDurationHistogram, _ = NewHistogramWithScale(routerReqDurations, time.Second)
|
||||
reg.routerReqsBytesCounter = routerReqsBytesTotal
|
||||
reg.routerRespsBytesCounter = routerRespsBytesTotal
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
serviceReqs := newCounterWithHeadersFrom(stdprometheus.CounterOpts{
|
||||
Name: serviceReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a service, partitioned by status code, protocol, and method.",
|
||||
}, config.HeaderLabels, []string{"code", "method", "protocol", "service"})
|
||||
serviceReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: serviceReqsTLSTotalName,
|
||||
Help: "How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.",
|
||||
}, []string{"tls_version", "tls_cipher", "service"})
|
||||
serviceReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: serviceReqDurationName,
|
||||
Help: "How long it took to process the request on a service, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "service"})
|
||||
serviceRetries := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: serviceRetriesTotalName,
|
||||
Help: "How many request retries happened on a service.",
|
||||
}, []string{"service"})
|
||||
serviceServerUp := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: serviceServerUpName,
|
||||
Help: "service server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"service", "url"})
|
||||
serviceReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: serviceReqsBytesTotalName,
|
||||
Help: "The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "service"})
|
||||
serviceRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: serviceRespsBytesTotalName,
|
||||
Help: "The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "service"})
|
||||
|
||||
promState.vectors = append(promState.vectors,
|
||||
serviceReqs.cv,
|
||||
serviceReqsTLS.cv,
|
||||
serviceReqDurations.hv,
|
||||
serviceRetries.cv,
|
||||
serviceServerUp.gv,
|
||||
serviceReqsBytesTotal.cv,
|
||||
serviceRespsBytesTotal.cv,
|
||||
)
|
||||
|
||||
reg.serviceReqsCounter = serviceReqs
|
||||
reg.serviceReqsTLSCounter = serviceReqsTLS
|
||||
reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(serviceReqDurations, time.Second)
|
||||
reg.serviceRetriesCounter = serviceRetries
|
||||
reg.serviceServerUpGauge = serviceServerUp
|
||||
reg.serviceReqsBytesCounter = serviceReqsBytesTotal
|
||||
reg.serviceRespsBytesCounter = serviceRespsBytesTotal
|
||||
}
|
||||
|
||||
return reg
|
||||
}
|
||||
|
||||
func registerPromState(ctx context.Context) bool {
|
||||
err := promRegistry.Register(promState)
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
logger := log.Ctx(ctx)
|
||||
|
||||
var arErr stdprometheus.AlreadyRegisteredError
|
||||
if errors.As(err, &arErr) {
|
||||
logger.Debug().Msg("Prometheus collector already registered.")
|
||||
return true
|
||||
}
|
||||
|
||||
logger.Error().Err(err).Msg("Unable to register Traefik to Prometheus")
|
||||
return false
|
||||
}
|
||||
|
||||
// OnConfigurationUpdate receives the current configuration from Traefik.
|
||||
// It then converts the configuration to the optimized package internal format
|
||||
// and sets it to the promState.
|
||||
func OnConfigurationUpdate(conf dynamic.Configuration, entryPoints []string) {
|
||||
dynCfg := newDynamicConfig()
|
||||
|
||||
for _, value := range entryPoints {
|
||||
dynCfg.entryPoints[value] = true
|
||||
}
|
||||
|
||||
if conf.HTTP == nil {
|
||||
promState.SetDynamicConfig(dynCfg)
|
||||
return
|
||||
}
|
||||
|
||||
for name := range conf.HTTP.Routers {
|
||||
dynCfg.routers[name] = true
|
||||
}
|
||||
|
||||
for serviceName, service := range conf.HTTP.Services {
|
||||
dynCfg.services[serviceName] = make(map[string]bool)
|
||||
if service.LoadBalancer != nil {
|
||||
for _, server := range service.LoadBalancer.Servers {
|
||||
dynCfg.services[serviceName][server.URL] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
promState.SetDynamicConfig(dynCfg)
|
||||
}
|
||||
|
||||
func newPrometheusState() *prometheusState {
|
||||
return &prometheusState{
|
||||
dynamicConfig: newDynamicConfig(),
|
||||
deletedURLs: make(map[string][]string),
|
||||
}
|
||||
}
|
||||
|
||||
type vector interface {
|
||||
stdprometheus.Collector
|
||||
DeletePartialMatch(labels stdprometheus.Labels) int
|
||||
}
|
||||
|
||||
type prometheusState struct {
|
||||
vectors []vector
|
||||
|
||||
mtx sync.Mutex
|
||||
dynamicConfig *dynamicConfig
|
||||
deletedEP []string
|
||||
deletedRouters []string
|
||||
deletedServices []string
|
||||
deletedURLs map[string][]string
|
||||
}
|
||||
|
||||
func (ps *prometheusState) SetDynamicConfig(dynamicConfig *dynamicConfig) {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
|
||||
for ep := range ps.dynamicConfig.entryPoints {
|
||||
if _, ok := dynamicConfig.entryPoints[ep]; !ok {
|
||||
ps.deletedEP = append(ps.deletedEP, ep)
|
||||
}
|
||||
}
|
||||
|
||||
for router := range ps.dynamicConfig.routers {
|
||||
if _, ok := dynamicConfig.routers[router]; !ok {
|
||||
ps.deletedRouters = append(ps.deletedRouters, router)
|
||||
}
|
||||
}
|
||||
|
||||
for service, serV := range ps.dynamicConfig.services {
|
||||
actualService, ok := dynamicConfig.services[service]
|
||||
if !ok {
|
||||
ps.deletedServices = append(ps.deletedServices, service)
|
||||
}
|
||||
for url := range serV {
|
||||
if _, ok := actualService[url]; !ok {
|
||||
ps.deletedURLs[service] = append(ps.deletedURLs[service], url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ps.dynamicConfig = dynamicConfig
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector and simply calls
|
||||
// the registered describer functions.
|
||||
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
for _, v := range ps.vectors {
|
||||
v.Describe(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements prometheus.Collector. It calls the Collect
|
||||
// method of all metrics it received on the collectors channel.
|
||||
// It's also responsible to remove metrics that belong to an outdated configuration.
|
||||
// The removal happens only after their Collect method was called to ensure that
|
||||
// also those metrics will be exported on the current scrape.
|
||||
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
||||
for _, v := range ps.vectors {
|
||||
v.Collect(ch)
|
||||
}
|
||||
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
|
||||
for _, ep := range ps.deletedEP {
|
||||
if !ps.dynamicConfig.hasEntryPoint(ep) {
|
||||
ps.DeletePartialMatch(map[string]string{"entrypoint": ep})
|
||||
}
|
||||
}
|
||||
|
||||
for _, router := range ps.deletedRouters {
|
||||
if !ps.dynamicConfig.hasRouter(router) {
|
||||
ps.DeletePartialMatch(map[string]string{"router": router})
|
||||
}
|
||||
}
|
||||
|
||||
for _, service := range ps.deletedServices {
|
||||
if !ps.dynamicConfig.hasService(service) {
|
||||
ps.DeletePartialMatch(map[string]string{"service": service})
|
||||
}
|
||||
}
|
||||
|
||||
for service, urls := range ps.deletedURLs {
|
||||
for _, url := range urls {
|
||||
if !ps.dynamicConfig.hasServerURL(service, url) {
|
||||
ps.DeletePartialMatch(map[string]string{"service": service, "url": url})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ps.deletedEP = nil
|
||||
ps.deletedRouters = nil
|
||||
ps.deletedServices = nil
|
||||
ps.deletedURLs = make(map[string][]string)
|
||||
}
|
||||
|
||||
// DeletePartialMatch deletes all metrics where the variable labels contain all of those passed in as labels.
|
||||
// The order of the labels does not matter.
|
||||
// It returns the number of metrics deleted.
|
||||
func (ps *prometheusState) DeletePartialMatch(labels stdprometheus.Labels) int {
|
||||
var count int
|
||||
for _, elem := range ps.vectors {
|
||||
count += elem.DeletePartialMatch(labels)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func newDynamicConfig() *dynamicConfig {
|
||||
return &dynamicConfig{
|
||||
entryPoints: make(map[string]bool),
|
||||
routers: make(map[string]bool),
|
||||
services: make(map[string]map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// dynamicConfig holds the current configuration for entryPoints, services,
|
||||
// and server URLs in an optimized way to check for existence. This provides
|
||||
// a performant way to check whether the collected metrics belong to the
|
||||
// current configuration or to an outdated one.
|
||||
type dynamicConfig struct {
|
||||
entryPoints map[string]bool
|
||||
routers map[string]bool
|
||||
services map[string]map[string]bool
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasEntryPoint(entrypointName string) bool {
|
||||
_, ok := d.entryPoints[entrypointName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasService(serviceName string) bool {
|
||||
_, ok := d.services[serviceName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasRouter(routerName string) bool {
|
||||
_, ok := d.routers[routerName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasServerURL(serviceName, serverURL string) bool {
|
||||
if service, hasService := d.services[serviceName]; hasService {
|
||||
_, ok := service[serverURL]
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func newCounterWithHeadersFrom(opts stdprometheus.CounterOpts, headers map[string]string, labelNames []string) *counterWithHeaders {
|
||||
var headerLabels []string
|
||||
for k := range headers {
|
||||
headerLabels = append(headerLabels, k)
|
||||
}
|
||||
|
||||
cv := stdprometheus.NewCounterVec(opts, append(labelNames, headerLabels...))
|
||||
c := &counterWithHeaders{
|
||||
name: opts.Name,
|
||||
headers: headers,
|
||||
cv: cv,
|
||||
}
|
||||
if len(labelNames) == 0 && len(headerLabels) == 0 {
|
||||
c.collector = cv.WithLabelValues()
|
||||
c.Add(0)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type counterWithHeaders struct {
|
||||
name string
|
||||
cv *stdprometheus.CounterVec
|
||||
labelNamesValues labelNamesValues
|
||||
headers map[string]string
|
||||
collector stdprometheus.Counter
|
||||
}
|
||||
|
||||
func (c *counterWithHeaders) With(headers http.Header, labelValues ...string) CounterWithHeaders {
|
||||
for headerLabel, headerKey := range c.headers {
|
||||
labelValues = append(labelValues, headerLabel, headers.Get(headerKey))
|
||||
}
|
||||
lnv := c.labelNamesValues.With(labelValues...)
|
||||
return &counterWithHeaders{
|
||||
name: c.name,
|
||||
headers: c.headers,
|
||||
cv: c.cv,
|
||||
labelNamesValues: lnv,
|
||||
collector: c.cv.With(lnv.ToLabels()),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *counterWithHeaders) Add(delta float64) {
|
||||
c.collector.Add(delta)
|
||||
}
|
||||
|
||||
func (c *counterWithHeaders) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
c.cv.Describe(ch)
|
||||
}
|
||||
|
||||
func newCounterFrom(opts stdprometheus.CounterOpts, labelNames []string) *counter {
|
||||
cv := stdprometheus.NewCounterVec(opts, labelNames)
|
||||
c := &counter{
|
||||
name: opts.Name,
|
||||
cv: cv,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
c.collector = cv.WithLabelValues()
|
||||
c.Add(0)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
cv *stdprometheus.CounterVec
|
||||
labelNamesValues labelNamesValues
|
||||
collector stdprometheus.Counter
|
||||
}
|
||||
|
||||
func (c *counter) With(labelValues ...string) metrics.Counter {
|
||||
lnv := c.labelNamesValues.With(labelValues...)
|
||||
return &counter{
|
||||
name: c.name,
|
||||
cv: c.cv,
|
||||
labelNamesValues: lnv,
|
||||
collector: c.cv.With(lnv.ToLabels()),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *counter) Add(delta float64) {
|
||||
c.collector.Add(delta)
|
||||
}
|
||||
|
||||
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
c.cv.Describe(ch)
|
||||
}
|
||||
|
||||
func newGaugeFrom(opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
|
||||
gv := stdprometheus.NewGaugeVec(opts, labelNames)
|
||||
g := &gauge{
|
||||
name: opts.Name,
|
||||
gv: gv,
|
||||
}
|
||||
|
||||
if len(labelNames) == 0 {
|
||||
g.collector = gv.WithLabelValues()
|
||||
g.Set(0)
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
gv *stdprometheus.GaugeVec
|
||||
labelNamesValues labelNamesValues
|
||||
collector stdprometheus.Gauge
|
||||
}
|
||||
|
||||
func (g *gauge) With(labelValues ...string) metrics.Gauge {
|
||||
lnv := g.labelNamesValues.With(labelValues...)
|
||||
return &gauge{
|
||||
name: g.name,
|
||||
gv: g.gv,
|
||||
labelNamesValues: lnv,
|
||||
collector: g.gv.With(lnv.ToLabels()),
|
||||
}
|
||||
}
|
||||
|
||||
func (g *gauge) Add(delta float64) {
|
||||
g.collector.Add(delta)
|
||||
}
|
||||
|
||||
func (g *gauge) Set(value float64) {
|
||||
g.collector.Set(value)
|
||||
}
|
||||
|
||||
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
g.gv.Describe(ch)
|
||||
}
|
||||
|
||||
func newHistogramFrom(opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
|
||||
hv := stdprometheus.NewHistogramVec(opts, labelNames)
|
||||
return &histogram{
|
||||
name: opts.Name,
|
||||
hv: hv,
|
||||
}
|
||||
}
|
||||
|
||||
type histogram struct {
|
||||
name string
|
||||
hv *stdprometheus.HistogramVec
|
||||
labelNamesValues labelNamesValues
|
||||
collector stdprometheus.Observer
|
||||
}
|
||||
|
||||
func (h *histogram) With(labelValues ...string) metrics.Histogram {
|
||||
lnv := h.labelNamesValues.With(labelValues...)
|
||||
return &histogram{
|
||||
name: h.name,
|
||||
hv: h.hv,
|
||||
labelNamesValues: lnv,
|
||||
collector: h.hv.With(lnv.ToLabels()),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *histogram) Observe(value float64) {
|
||||
h.collector.Observe(value)
|
||||
}
|
||||
|
||||
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
h.hv.Describe(ch)
|
||||
}
|
||||
|
||||
// labelNamesValues is a type alias that provides validation on its With method.
|
||||
// Metrics may include it as a member to help them satisfy With semantics and
|
||||
// save some code duplication.
|
||||
type labelNamesValues []string
|
||||
|
||||
// With validates the input, and returns a new aggregate labelNamesValues.
|
||||
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
|
||||
if len(labelValues)%2 != 0 {
|
||||
labelValues = append(labelValues, "unknown")
|
||||
}
|
||||
|
||||
labels := make([]string, len(lvs)+len(labelValues))
|
||||
n := copy(labels, lvs)
|
||||
copy(labels[n:], labelValues)
|
||||
|
||||
return labels
|
||||
}
|
||||
|
||||
// ToLabels is a convenience method to convert a labelNamesValues
|
||||
// to the native prometheus.Labels.
|
||||
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
|
||||
labels := make(map[string]string, len(lvs)/2)
|
||||
for i := 0; i < len(lvs); i += 2 {
|
||||
labels[lvs[i]] = lvs[i+1]
|
||||
}
|
||||
return labels
|
||||
}
|
||||
740
pkg/observability/metrics/prometheus_test.go
Normal file
740
pkg/observability/metrics/prometheus_test.go
Normal file
|
|
@ -0,0 +1,740 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/traefik/traefik/v3/pkg/config/dynamic"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
th "github.com/traefik/traefik/v3/pkg/testhelpers"
|
||||
)
|
||||
|
||||
func TestRegisterPromState(t *testing.T) {
|
||||
t.Cleanup(promState.reset)
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
prometheusSlice []*otypes.Prometheus
|
||||
initPromState bool
|
||||
unregisterPromState bool
|
||||
expectedNbRegistries int
|
||||
}{
|
||||
{
|
||||
desc: "Register once",
|
||||
prometheusSlice: []*otypes.Prometheus{{}},
|
||||
initPromState: true,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 1,
|
||||
},
|
||||
{
|
||||
desc: "Register once with no promState init",
|
||||
prometheusSlice: []*otypes.Prometheus{{}},
|
||||
initPromState: false,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 1,
|
||||
},
|
||||
{
|
||||
desc: "Register twice",
|
||||
prometheusSlice: []*otypes.Prometheus{{}, {}},
|
||||
initPromState: true,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 2,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with no promstate init",
|
||||
prometheusSlice: []*otypes.Prometheus{{}, {}},
|
||||
initPromState: false,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 2,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with unregister",
|
||||
prometheusSlice: []*otypes.Prometheus{{}, {}},
|
||||
initPromState: true,
|
||||
unregisterPromState: true,
|
||||
expectedNbRegistries: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
actualNbRegistries := 0
|
||||
for _, prom := range test.prometheusSlice {
|
||||
if test.initPromState {
|
||||
initStandardRegistry(prom)
|
||||
}
|
||||
if registerPromState(t.Context()) {
|
||||
actualNbRegistries++
|
||||
}
|
||||
if test.unregisterPromState {
|
||||
promRegistry.Unregister(promState)
|
||||
}
|
||||
|
||||
promState.reset()
|
||||
}
|
||||
|
||||
promRegistry.Unregister(promState)
|
||||
assert.Equal(t, test.expectedNbRegistries, actualNbRegistries)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
promState = newPrometheusState()
|
||||
promRegistry = prometheus.NewRegistry()
|
||||
t.Cleanup(promState.reset)
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(t.Context(), &otypes.Prometheus{
|
||||
AddEntryPointsLabels: true,
|
||||
AddRoutersLabels: true,
|
||||
AddServicesLabels: true,
|
||||
HeaderLabels: map[string]string{"useragent": "User-Agent"},
|
||||
})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
if !prometheusRegistry.IsEpEnabled() || !prometheusRegistry.IsRouterEnabled() || !prometheusRegistry.IsSvcEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
|
||||
}
|
||||
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
prometheusRegistry.
|
||||
OpenConnectionsGauge().
|
||||
With("entrypoint", "test", "protocol", "TCP").
|
||||
Set(1)
|
||||
|
||||
prometheusRegistry.
|
||||
TLSCertsNotAfterTimestampGauge().
|
||||
With("cn", "value", "serial", "value", "sans", "value").
|
||||
Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
EntryPointReqsCounter().
|
||||
With(map[string][]string{"User-Agent": {"foobar"}}, "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntryPointReqDurationHistogram().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Observe(1)
|
||||
prometheusRegistry.
|
||||
EntryPointRespsBytesCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntryPointReqsBytesCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
|
||||
prometheusRegistry.
|
||||
RouterReqsCounter().
|
||||
With(nil, "router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
RouterReqsTLSCounter().
|
||||
With("router", "demo", "service", "service1", "tls_version", "foo", "tls_cipher", "bar").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
RouterReqDurationHistogram().
|
||||
With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
RouterRespsBytesCounter().
|
||||
With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
RouterReqsBytesCounter().
|
||||
With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
prometheusRegistry.
|
||||
ServiceReqsCounter().
|
||||
With(map[string][]string{"User-Agent": {"foobar"}}, "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceReqsTLSCounter().
|
||||
With("service", "service1", "tls_version", "foo", "tls_cipher", "bar").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceReqDurationHistogram().
|
||||
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
ServiceRetriesCounter().
|
||||
With("service", "service1").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceServerUpGauge().
|
||||
With("service", "service1", "url", "http://127.0.0.10:80").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
ServiceRespsBytesCounter().
|
||||
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceReqsBytesCounter().
|
||||
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
labels map[string]string
|
||||
assert func(*dto.MetricFamily)
|
||||
}{
|
||||
{
|
||||
name: configReloadsTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: configLastReloadSuccessName,
|
||||
assert: buildTimestampAssert(t, configLastReloadSuccessName),
|
||||
},
|
||||
{
|
||||
name: openConnectionsName,
|
||||
labels: map[string]string{
|
||||
"protocol": "TCP",
|
||||
"entrypoint": "test",
|
||||
},
|
||||
assert: buildGaugeAssert(t, openConnectionsName, 1),
|
||||
},
|
||||
{
|
||||
name: tlsCertsNotAfterTimestampName,
|
||||
labels: map[string]string{
|
||||
"cn": "value",
|
||||
"serial": "value",
|
||||
"sans": "value",
|
||||
},
|
||||
assert: buildTimestampAssert(t, tlsCertsNotAfterTimestampName),
|
||||
},
|
||||
{
|
||||
name: entryPointReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
"useragent": "foobar",
|
||||
},
|
||||
assert: buildCounterAssert(t, entryPointReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entryPointReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildHistogramAssert(t, entryPointReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: entryPointReqsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildCounterAssert(t, entryPointReqsBytesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entryPointRespsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildCounterAssert(t, entryPointRespsBytesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: routerReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
"router": "demo",
|
||||
"useragent": "",
|
||||
},
|
||||
assert: buildCounterAssert(t, routerReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: routerReqsTLSTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "service1",
|
||||
"router": "demo",
|
||||
"tls_version": "foo",
|
||||
"tls_cipher": "bar",
|
||||
},
|
||||
assert: buildCounterAssert(t, routerReqsTLSTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: routerReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
"router": "demo",
|
||||
},
|
||||
assert: buildHistogramAssert(t, routerReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: routerReqsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
"router": "demo",
|
||||
},
|
||||
assert: buildCounterAssert(t, routerReqsBytesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: routerRespsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
"router": "demo",
|
||||
},
|
||||
assert: buildCounterAssert(t, routerRespsBytesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
"useragent": "foobar",
|
||||
},
|
||||
assert: buildCounterAssert(t, serviceReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceReqsTLSTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "service1",
|
||||
"tls_version": "foo",
|
||||
"tls_cipher": "bar",
|
||||
},
|
||||
assert: buildCounterAssert(t, serviceReqsTLSTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildHistogramAssert(t, serviceReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceRetriesTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildGreaterThanCounterAssert(t, serviceRetriesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceServerUpName,
|
||||
labels: map[string]string{
|
||||
"service": "service1",
|
||||
"url": "http://127.0.0.10:80",
|
||||
},
|
||||
assert: buildGaugeAssert(t, serviceServerUpName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceReqsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildCounterAssert(t, serviceReqsBytesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: serviceRespsBytesTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildCounterAssert(t, serviceRespsBytesTotalName, 1),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
family := findMetricFamily(test.name, metricsFamilies)
|
||||
if family == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", test.name)
|
||||
return
|
||||
}
|
||||
|
||||
for _, label := range family.GetMetric()[0].GetLabel() {
|
||||
val, ok := test.labels[label.GetName()]
|
||||
if !ok {
|
||||
t.Errorf("%q metric contains unexpected label %q", test.name, label.GetName())
|
||||
} else if val != label.GetValue() {
|
||||
t.Errorf("label %q in metric %q has wrong value %q, expected %q", label.GetName(), test.name, label.GetValue(), val)
|
||||
}
|
||||
}
|
||||
test.assert(family)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusMetricRemoval(t *testing.T) {
|
||||
promState = newPrometheusState()
|
||||
promRegistry = prometheus.NewRegistry()
|
||||
t.Cleanup(promState.reset)
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(t.Context(), &otypes.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true, AddRoutersLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
conf1 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithRouters(
|
||||
th.WithRouter("foo@providerName", th.WithServiceName("bar")),
|
||||
th.WithRouter("router2", th.WithServiceName("bar@providerName")),
|
||||
),
|
||||
th.WithLoadBalancerServices(
|
||||
th.WithService("bar@providerName", th.WithServers(
|
||||
th.WithServer("http://localhost:9000"),
|
||||
th.WithServer("http://localhost:9999"),
|
||||
th.WithServer("http://localhost:9998"),
|
||||
)),
|
||||
th.WithService("service1", th.WithServers(th.WithServer("http://localhost:9000"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
conf2 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithRouters(
|
||||
th.WithRouter("foo@providerName", th.WithServiceName("bar")),
|
||||
),
|
||||
th.WithLoadBalancerServices(
|
||||
th.WithService("bar@providerName", th.WithServers(th.WithServer("http://localhost:9000"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
OnConfigurationUpdate(conf1, []string{"entrypoint1", "entrypoint2"})
|
||||
OnConfigurationUpdate(conf2, []string{"entrypoint1"})
|
||||
|
||||
// Register some metrics manually that are not part of the active configuration.
|
||||
// Those metrics should be part of the /metrics output on the first scrape but
|
||||
// should be removed after that scrape.
|
||||
prometheusRegistry.
|
||||
EntryPointReqsCounter().
|
||||
With(nil, "entrypoint", "entrypoint2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
RouterReqsCounter().
|
||||
With(nil, "router", "router2", "service", "bar@providerName", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceReqsCounter().
|
||||
With(nil, "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceServerUpGauge().
|
||||
With("service", "bar@providerName", "url", "http://localhost:9999").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
ServiceServerUpGauge().
|
||||
With("service", "bar@providerName", "url", "http://localhost:9998").
|
||||
Set(1)
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName, routerReqsTotalName, serviceReqsTotalName, serviceServerUpName)
|
||||
assertMetricsAbsent(t, mustScrape(), entryPointReqsTotalName, routerReqsTotalName, serviceReqsTotalName, serviceServerUpName)
|
||||
|
||||
// To verify that metrics belonging to active configurations are not removed
|
||||
// here the counter examples.
|
||||
prometheusRegistry.
|
||||
EntryPointReqsCounter().
|
||||
With(nil, "entrypoint", "entrypoint1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
RouterReqsCounter().
|
||||
With(nil, "router", "foo@providerName", "service", "bar", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceReqsCounter().
|
||||
With(nil, "service", "bar@providerName", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
ServiceServerUpGauge().
|
||||
With("service", "bar@providerName", "url", "http://localhost:9000").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName, serviceReqsTotalName, serviceServerUpName, routerReqsTotalName)
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName, serviceReqsTotalName, serviceServerUpName, routerReqsTotalName)
|
||||
}
|
||||
|
||||
func TestPrometheusMetricRemoveEndpointForRecoveredService(t *testing.T) {
|
||||
promState = newPrometheusState()
|
||||
promRegistry = prometheus.NewRegistry()
|
||||
t.Cleanup(promState.reset)
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(t.Context(), &otypes.Prometheus{AddServicesLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
conf1 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithLoadBalancerServices(
|
||||
th.WithService("service1", th.WithServers(th.WithServer("http://localhost:9000"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
conf2 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(),
|
||||
}
|
||||
|
||||
conf3 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithLoadBalancerServices(
|
||||
th.WithService("service1", th.WithServers(th.WithServer("http://localhost:9001"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
OnConfigurationUpdate(conf1, []string{})
|
||||
OnConfigurationUpdate(conf2, []string{})
|
||||
OnConfigurationUpdate(conf3, []string{})
|
||||
|
||||
prometheusRegistry.
|
||||
ServiceServerUpGauge().
|
||||
With("service", "service1", "url", "http://localhost:9000").
|
||||
Add(1)
|
||||
|
||||
assertMetricsExist(t, mustScrape(), serviceServerUpName)
|
||||
assertMetricsAbsent(t, mustScrape(), serviceServerUpName)
|
||||
}
|
||||
|
||||
func TestPrometheusRemovedMetricsReset(t *testing.T) {
|
||||
t.Cleanup(promState.reset)
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(t.Context(), &otypes.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
conf1 := dynamic.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithLoadBalancerServices(th.WithService("service",
|
||||
th.WithServers(th.WithServer("http://localhost:9000"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
OnConfigurationUpdate(conf1, []string{"entrypoint1", "entrypoint2"})
|
||||
OnConfigurationUpdate(dynamic.Configuration{}, nil)
|
||||
|
||||
labelNamesValues := []string{
|
||||
"service", "service",
|
||||
"code", strconv.Itoa(http.StatusOK),
|
||||
"method", http.MethodGet,
|
||||
"protocol", "http",
|
||||
}
|
||||
prometheusRegistry.
|
||||
ServiceReqsCounter().
|
||||
With(nil, labelNamesValues...).
|
||||
Add(3)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
assertCounterValue(t, 3, findMetricFamily(serviceReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
|
||||
// There is no dynamic configuration and so this metric will be deleted
|
||||
// after the first scrape.
|
||||
assertMetricsAbsent(t, mustScrape(), serviceReqsTotalName)
|
||||
|
||||
prometheusRegistry.
|
||||
ServiceReqsCounter().
|
||||
With(nil, labelNamesValues...).
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies = mustScrape()
|
||||
assertCounterValue(t, 1, findMetricFamily(serviceReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
}
|
||||
|
||||
// reset is a utility method for unit testing.
|
||||
// It should be called after each test run that changes promState internally
|
||||
// in order to avoid dependencies between unit tests.
|
||||
func (ps *prometheusState) reset() {
|
||||
ps.dynamicConfig = newDynamicConfig()
|
||||
ps.vectors = nil
|
||||
ps.deletedEP = nil
|
||||
ps.deletedRouters = nil
|
||||
ps.deletedServices = nil
|
||||
ps.deletedURLs = make(map[string][]string)
|
||||
}
|
||||
|
||||
// Tracking and gathering the metrics happens concurrently.
|
||||
// In practice this is no problem, because in case a tracked metric would miss the current scrape,
|
||||
// it would just be there in the next one.
|
||||
// That we can test reliably the tracking of all metrics here,
|
||||
// we sleep for a short amount of time,
|
||||
// to make sure the metric will be present in the next scrape.
|
||||
func delayForTrackingCompletion() {
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
|
||||
func mustScrape() []*dto.MetricFamily {
|
||||
families, err := promRegistry.Gather()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("could not gather metrics families: %s", err))
|
||||
}
|
||||
return families
|
||||
}
|
||||
|
||||
func assertMetricsExist(t *testing.T, families []*dto.MetricFamily, metricNames ...string) {
|
||||
t.Helper()
|
||||
|
||||
for _, metricName := range metricNames {
|
||||
if findMetricFamily(metricName, families) == nil {
|
||||
t.Errorf("gathered metrics should contain %q", metricName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func assertMetricsAbsent(t *testing.T, families []*dto.MetricFamily, metricNames ...string) {
|
||||
t.Helper()
|
||||
|
||||
for _, metricName := range metricNames {
|
||||
if findMetricFamily(metricName, families) != nil {
|
||||
t.Errorf("gathered metrics should not contain %q", metricName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
|
||||
for _, family := range families {
|
||||
if family.GetName() == name {
|
||||
return family
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findMetricByLabelNamesValues(family *dto.MetricFamily, labelNamesValues ...string) *dto.Metric {
|
||||
if family == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, metric := range family.GetMetric() {
|
||||
if hasMetricAllLabelPairs(metric, labelNamesValues...) {
|
||||
return metric
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasMetricAllLabelPairs(metric *dto.Metric, labelNamesValues ...string) bool {
|
||||
for i := 0; i < len(labelNamesValues); i += 2 {
|
||||
name, val := labelNamesValues[i], labelNamesValues[i+1]
|
||||
if !hasMetricLabelPair(metric, name, val) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func hasMetricLabelPair(metric *dto.Metric, labelName, labelValue string) bool {
|
||||
for _, labelPair := range metric.GetLabel() {
|
||||
if labelPair.GetName() == labelName && labelPair.GetValue() == labelValue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func assertCounterValue(t *testing.T, want float64, family *dto.MetricFamily, labelNamesValues ...string) {
|
||||
t.Helper()
|
||||
|
||||
metric := findMetricByLabelNamesValues(family, labelNamesValues...)
|
||||
|
||||
if metric == nil {
|
||||
t.Error("metric must not be nil")
|
||||
return
|
||||
}
|
||||
if metric.GetCounter() == nil {
|
||||
t.Errorf("metric %s must be a counter", family.GetName())
|
||||
return
|
||||
}
|
||||
|
||||
if cv := metric.GetCounter().GetValue(); cv != want {
|
||||
t.Errorf("metric %s has value %v, want %v", family.GetName(), cv, want)
|
||||
}
|
||||
}
|
||||
|
||||
func buildCounterAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
t.Helper()
|
||||
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.GetMetric()[0].GetCounter().GetValue()); cv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, cv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGreaterThanCounterAssert(t *testing.T, metricName string, expectedMinValue int) func(family *dto.MetricFamily) {
|
||||
t.Helper()
|
||||
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.GetMetric()[0].GetCounter().GetValue()); cv < expectedMinValue {
|
||||
t.Errorf("metric %s has value %d, want at least %d", metricName, cv, expectedMinValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildHistogramAssert(t *testing.T, metricName string, expectedSampleCount int) func(family *dto.MetricFamily) {
|
||||
t.Helper()
|
||||
|
||||
return func(family *dto.MetricFamily) {
|
||||
if sc := int(family.GetMetric()[0].GetHistogram().GetSampleCount()); sc != expectedSampleCount {
|
||||
t.Errorf("metric %s has sample count value %d, want %d", metricName, sc, expectedSampleCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGaugeAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
t.Helper()
|
||||
|
||||
return func(family *dto.MetricFamily) {
|
||||
if gv := int(family.GetMetric()[0].GetGauge().GetValue()); gv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, gv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildTimestampAssert(t *testing.T, metricName string) func(family *dto.MetricFamily) {
|
||||
t.Helper()
|
||||
|
||||
return func(family *dto.MetricFamily) {
|
||||
if ts := time.Unix(int64(family.GetMetric()[0].GetGauge().GetValue()), 0); time.Since(ts) > time.Minute {
|
||||
t.Errorf("metric %s has wrong timestamp %v", metricName, ts)
|
||||
}
|
||||
}
|
||||
}
|
||||
121
pkg/observability/metrics/statsd.go
Normal file
121
pkg/observability/metrics/statsd.go
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/metrics/statsd"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/observability/logs"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"github.com/traefik/traefik/v3/pkg/safe"
|
||||
)
|
||||
|
||||
var (
|
||||
statsdClient *statsd.Statsd
|
||||
statsdTicker *time.Ticker
|
||||
)
|
||||
|
||||
const (
|
||||
statsdConfigReloadsName = "config.reload.total"
|
||||
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
statsdOpenConnectionsName = "open.connections"
|
||||
|
||||
statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
|
||||
|
||||
statsdEntryPointReqsName = "entrypoint.request.total"
|
||||
statsdEntryPointReqsTLSName = "entrypoint.request.tls.total"
|
||||
statsdEntryPointReqDurationName = "entrypoint.request.duration"
|
||||
statsdEntryPointReqsBytesName = "entrypoint.requests.bytes.total"
|
||||
statsdEntryPointRespsBytesName = "entrypoint.responses.bytes.total"
|
||||
|
||||
statsdRouterReqsName = "router.request.total"
|
||||
statsdRouterReqsTLSName = "router.request.tls.total"
|
||||
statsdRouterReqsDurationName = "router.request.duration"
|
||||
statsdRouterReqsBytesName = "router.requests.bytes.total"
|
||||
statsdRouterRespsBytesName = "router.responses.bytes.total"
|
||||
|
||||
statsdServiceReqsName = "service.request.total"
|
||||
statsdServiceReqsTLSName = "service.request.tls.total"
|
||||
statsdServiceReqsDurationName = "service.request.duration"
|
||||
statsdServiceRetriesTotalName = "service.retries.total"
|
||||
statsdServiceServerUpName = "service.server.up"
|
||||
statsdServiceReqsBytesName = "service.requests.bytes.total"
|
||||
statsdServiceRespsBytesName = "service.responses.bytes.total"
|
||||
)
|
||||
|
||||
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
|
||||
func RegisterStatsd(ctx context.Context, config *otypes.Statsd) Registry {
|
||||
// just to be sure there is a prefix defined
|
||||
if config.Prefix == "" {
|
||||
config.Prefix = defaultMetricsPrefix
|
||||
}
|
||||
|
||||
statsdClient = statsd.New(config.Prefix+".", logs.NewGoKitWrapper(*log.Ctx(ctx)))
|
||||
|
||||
if statsdTicker == nil {
|
||||
statsdTicker = initStatsdTicker(ctx, config)
|
||||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
|
||||
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
|
||||
tlsCertsNotAfterTimestampGauge: statsdClient.NewGauge(statsdTLSCertsNotAfterTimestampName),
|
||||
openConnectionsGauge: statsdClient.NewGauge(statsdOpenConnectionsName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = NewCounterWithNoopHeaders(statsdClient.NewCounter(statsdEntryPointReqsName, 1.0))
|
||||
registry.entryPointReqsTLSCounter = statsdClient.NewCounter(statsdEntryPointReqsTLSName, 1.0)
|
||||
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0), time.Millisecond)
|
||||
registry.entryPointReqsBytesCounter = statsdClient.NewCounter(statsdEntryPointReqsBytesName, 1.0)
|
||||
registry.entryPointRespsBytesCounter = statsdClient.NewCounter(statsdEntryPointRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
if config.AddRoutersLabels {
|
||||
registry.routerEnabled = config.AddRoutersLabels
|
||||
registry.routerReqsCounter = NewCounterWithNoopHeaders(statsdClient.NewCounter(statsdRouterReqsName, 1.0))
|
||||
registry.routerReqsTLSCounter = statsdClient.NewCounter(statsdRouterReqsTLSName, 1.0)
|
||||
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdRouterReqsDurationName, 1.0), time.Millisecond)
|
||||
registry.routerReqsBytesCounter = statsdClient.NewCounter(statsdRouterReqsBytesName, 1.0)
|
||||
registry.routerRespsBytesCounter = statsdClient.NewCounter(statsdRouterRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = NewCounterWithNoopHeaders(statsdClient.NewCounter(statsdServiceReqsName, 1.0))
|
||||
registry.serviceReqsTLSCounter = statsdClient.NewCounter(statsdServiceReqsTLSName, 1.0)
|
||||
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdServiceReqsDurationName, 1.0), time.Millisecond)
|
||||
registry.serviceRetriesCounter = statsdClient.NewCounter(statsdServiceRetriesTotalName, 1.0)
|
||||
registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServiceServerUpName)
|
||||
registry.serviceReqsBytesCounter = statsdClient.NewCounter(statsdServiceReqsBytesName, 1.0)
|
||||
registry.serviceRespsBytesCounter = statsdClient.NewCounter(statsdServiceRespsBytesName, 1.0)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// initStatsdTicker initializes metrics pusher and creates a statsdClient if not created already.
|
||||
func initStatsdTicker(ctx context.Context, config *otypes.Statsd) *time.Ticker {
|
||||
address := config.Address
|
||||
if len(address) == 0 {
|
||||
address = "localhost:8125"
|
||||
}
|
||||
|
||||
report := time.NewTicker(time.Duration(config.PushInterval))
|
||||
|
||||
safe.Go(func() {
|
||||
statsdClient.SendLoop(ctx, report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopStatsd stops internal statsdTicker which controls the pushing of metrics to StatsD Agent and resets it to `nil`.
|
||||
func StopStatsd() {
|
||||
if statsdTicker != nil {
|
||||
statsdTicker.Stop()
|
||||
}
|
||||
statsdTicker = nil
|
||||
}
|
||||
107
pkg/observability/metrics/statsd_test.go
Normal file
107
pkg/observability/metrics/statsd_test.go
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stvp/go-udp-testing"
|
||||
ptypes "github.com/traefik/paerser/types"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
)
|
||||
|
||||
func TestStatsD(t *testing.T) {
|
||||
t.Cleanup(func() {
|
||||
StopStatsd()
|
||||
})
|
||||
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
statsdRegistry := RegisterStatsd(t.Context(), &otypes.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
|
||||
|
||||
testRegistry(t, defaultMetricsPrefix, statsdRegistry)
|
||||
}
|
||||
|
||||
func TestStatsDWithPrefix(t *testing.T) {
|
||||
t.Cleanup(func() {
|
||||
StopStatsd()
|
||||
})
|
||||
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
statsdRegistry := RegisterStatsd(t.Context(), &otypes.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true, Prefix: "testPrefix"})
|
||||
|
||||
testRegistry(t, "testPrefix", statsdRegistry)
|
||||
}
|
||||
|
||||
func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
|
||||
t.Helper()
|
||||
|
||||
if !registry.IsEpEnabled() || !registry.IsRouterEnabled() || !registry.IsSvcEnabled() {
|
||||
t.Errorf("Statsd registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
metricsPrefix + ".config.reload.total:1.000000|c\n",
|
||||
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
|
||||
metricsPrefix + ".open.connections:1.000000|g\n",
|
||||
|
||||
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g\n",
|
||||
|
||||
metricsPrefix + ".entrypoint.request.total:1.000000|c\n",
|
||||
metricsPrefix + ".entrypoint.request.tls.total:1.000000|c\n",
|
||||
metricsPrefix + ".entrypoint.request.duration:10000.000000|ms",
|
||||
metricsPrefix + ".entrypoint.requests.bytes.total:1.000000|c\n",
|
||||
metricsPrefix + ".entrypoint.responses.bytes.total:1.000000|c\n",
|
||||
|
||||
metricsPrefix + ".router.request.total:2.000000|c\n",
|
||||
metricsPrefix + ".router.request.tls.total:1.000000|c\n",
|
||||
metricsPrefix + ".router.request.duration:10000.000000|ms",
|
||||
metricsPrefix + ".router.requests.bytes.total:1.000000|c\n",
|
||||
metricsPrefix + ".router.responses.bytes.total:1.000000|c\n",
|
||||
|
||||
metricsPrefix + ".service.request.total:2.000000|c\n",
|
||||
metricsPrefix + ".service.request.tls.total:1.000000|c\n",
|
||||
metricsPrefix + ".service.request.duration:10000.000000|ms",
|
||||
metricsPrefix + ".service.retries.total:2.000000|c\n",
|
||||
metricsPrefix + ".service.server.up:1.000000|g\n",
|
||||
metricsPrefix + ".service.requests.bytes.total:1.000000|c\n",
|
||||
metricsPrefix + ".service.responses.bytes.total:1.000000|c\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
registry.ConfigReloadsCounter().Add(1)
|
||||
registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1)
|
||||
|
||||
registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
|
||||
registry.EntryPointReqsCounter().With(nil, "entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
registry.EntryPointReqsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.EntryPointRespsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
|
||||
registry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.RouterReqsCounter().With(nil, "router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
registry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
|
||||
registry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceReqsCounter().With(nil, "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
|
||||
registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
registry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
registry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
|
||||
registry.ServiceReqsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
registry.ServiceRespsBytesCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
})
|
||||
}
|
||||
15
pkg/observability/observability.go
Normal file
15
pkg/observability/observability.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
package observability
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
func EnsureUserEnvVar() error {
|
||||
if os.Getenv("USER") == "" {
|
||||
if err := os.Setenv("USER", "traefik"); err != nil {
|
||||
return fmt.Errorf("could not set USER environment variable: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
368
pkg/observability/tracing/tracing.go
Normal file
368
pkg/observability/tracing/tracing.go
Normal file
|
|
@ -0,0 +1,368 @@
|
|||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/traefik/traefik/v3/pkg/config/static"
|
||||
"github.com/traefik/traefik/v3/pkg/observability"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"go.opentelemetry.io/contrib/propagators/autoprop"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// Backend is an abstraction for tracking backend (OpenTelemetry, ...).
|
||||
type Backend interface {
|
||||
Setup(ctx context.Context, serviceName string, sampleRate float64, resourceAttributes map[string]string) (trace.Tracer, io.Closer, error)
|
||||
}
|
||||
|
||||
// NewTracing Creates a Tracing.
|
||||
func NewTracing(ctx context.Context, conf *static.Tracing) (*Tracer, io.Closer, error) {
|
||||
var backend Backend
|
||||
|
||||
if conf.OTLP != nil {
|
||||
backend = conf.OTLP
|
||||
}
|
||||
|
||||
if backend == nil {
|
||||
log.Debug().Msg("Could not initialize tracing, using OpenTelemetry by default")
|
||||
defaultBackend := &otypes.OTelTracing{}
|
||||
backend = defaultBackend
|
||||
}
|
||||
|
||||
if err := observability.EnsureUserEnvVar(); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
otel.SetTextMapPropagator(autoprop.NewTextMapPropagator())
|
||||
|
||||
tr, closer, err := backend.Setup(ctx, conf.ServiceName, conf.SampleRate, conf.ResourceAttributes)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return NewTracer(tr, conf.CapturedRequestHeaders, conf.CapturedResponseHeaders, conf.SafeQueryParams), closer, nil
|
||||
}
|
||||
|
||||
// TracerFromContext extracts the trace.Tracer from the given context.
|
||||
func TracerFromContext(ctx context.Context) *Tracer {
|
||||
// Prevent picking trace.noopSpan tracer.
|
||||
if !trace.SpanContextFromContext(ctx).IsValid() {
|
||||
return nil
|
||||
}
|
||||
|
||||
span := trace.SpanFromContext(ctx)
|
||||
if span != nil && span.TracerProvider() != nil {
|
||||
tracer := span.TracerProvider().Tracer("github.com/traefik/traefik")
|
||||
if tracer, ok := tracer.(*Tracer); ok {
|
||||
return tracer
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExtractCarrierIntoContext reads cross-cutting concerns from the carrier into a Context.
|
||||
func ExtractCarrierIntoContext(ctx context.Context, headers http.Header) context.Context {
|
||||
propagator := otel.GetTextMapPropagator()
|
||||
return propagator.Extract(ctx, propagation.HeaderCarrier(headers))
|
||||
}
|
||||
|
||||
// InjectContextIntoCarrier sets cross-cutting concerns from the request context into the request headers.
|
||||
func InjectContextIntoCarrier(req *http.Request) {
|
||||
propagator := otel.GetTextMapPropagator()
|
||||
propagator.Inject(req.Context(), propagation.HeaderCarrier(req.Header))
|
||||
}
|
||||
|
||||
// Span is trace.Span wrapping the Traefik TracerProvider.
|
||||
type Span struct {
|
||||
trace.Span
|
||||
|
||||
tracerProvider *TracerProvider
|
||||
}
|
||||
|
||||
// TracerProvider returns the span's TraceProvider.
|
||||
func (s Span) TracerProvider() trace.TracerProvider {
|
||||
return s.tracerProvider
|
||||
}
|
||||
|
||||
// TracerProvider is trace.TracerProvider wrapping the Traefik Tracer implementation.
|
||||
type TracerProvider struct {
|
||||
trace.TracerProvider
|
||||
|
||||
tracer *Tracer
|
||||
}
|
||||
|
||||
// Tracer returns the trace.Tracer for the given options.
|
||||
// It returns specifically the Traefik Tracer when requested.
|
||||
func (t TracerProvider) Tracer(name string, options ...trace.TracerOption) trace.Tracer {
|
||||
if name == "github.com/traefik/traefik" {
|
||||
return t.tracer
|
||||
}
|
||||
|
||||
return t.TracerProvider.Tracer(name, options...)
|
||||
}
|
||||
|
||||
// Tracer is trace.Tracer with additional properties.
|
||||
type Tracer struct {
|
||||
trace.Tracer
|
||||
|
||||
safeQueryParams []string
|
||||
capturedRequestHeaders []string
|
||||
capturedResponseHeaders []string
|
||||
}
|
||||
|
||||
// NewTracer builds and configures a new Tracer.
|
||||
func NewTracer(tracer trace.Tracer, capturedRequestHeaders, capturedResponseHeaders, safeQueryParams []string) *Tracer {
|
||||
return &Tracer{
|
||||
Tracer: tracer,
|
||||
safeQueryParams: safeQueryParams,
|
||||
capturedRequestHeaders: canonicalizeHeaders(capturedRequestHeaders),
|
||||
capturedResponseHeaders: canonicalizeHeaders(capturedResponseHeaders),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts a new span.
|
||||
// spancheck linter complains about span.End not being called, but this is expected here,
|
||||
// hence its deactivation.
|
||||
//
|
||||
//nolint:spancheck
|
||||
func (t *Tracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
|
||||
if t == nil {
|
||||
return ctx, nil
|
||||
}
|
||||
|
||||
spanCtx, span := t.Tracer.Start(ctx, spanName, opts...)
|
||||
|
||||
wrappedSpan := &Span{Span: span, tracerProvider: &TracerProvider{TracerProvider: span.TracerProvider(), tracer: t}}
|
||||
|
||||
return trace.ContextWithSpan(spanCtx, wrappedSpan), wrappedSpan
|
||||
}
|
||||
|
||||
// CaptureClientRequest used to add span attributes from the request as a Client.
|
||||
func (t *Tracer) CaptureClientRequest(span trace.Span, r *http.Request) {
|
||||
if t == nil || span == nil || r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Common attributes https://github.com/open-telemetry/semantic-conventions/blob/v1.26.0/docs/http/http-spans.md#common-attributes
|
||||
span.SetAttributes(semconv.HTTPRequestMethodKey.String(r.Method))
|
||||
span.SetAttributes(semconv.NetworkProtocolVersion(proto(r.Proto)))
|
||||
|
||||
// Client attributes https://github.com/open-telemetry/semantic-conventions/blob/v1.26.0/docs/http/http-spans.md#http-client
|
||||
sURL := t.safeURL(r.URL)
|
||||
span.SetAttributes(semconv.URLFull(sURL.String()))
|
||||
span.SetAttributes(semconv.URLScheme(sURL.Scheme))
|
||||
span.SetAttributes(semconv.UserAgentOriginal(r.UserAgent()))
|
||||
|
||||
host, port, err := net.SplitHostPort(sURL.Host)
|
||||
if err != nil {
|
||||
span.SetAttributes(semconv.NetworkPeerAddress(host))
|
||||
span.SetAttributes(semconv.ServerAddress(sURL.Host))
|
||||
switch sURL.Scheme {
|
||||
case "http":
|
||||
span.SetAttributes(semconv.NetworkPeerPort(80))
|
||||
span.SetAttributes(semconv.ServerPort(80))
|
||||
case "https":
|
||||
span.SetAttributes(semconv.NetworkPeerPort(443))
|
||||
span.SetAttributes(semconv.ServerPort(443))
|
||||
}
|
||||
} else {
|
||||
span.SetAttributes(semconv.NetworkPeerAddress(host))
|
||||
intPort, _ := strconv.Atoi(port)
|
||||
span.SetAttributes(semconv.NetworkPeerPort(intPort))
|
||||
span.SetAttributes(semconv.ServerAddress(host))
|
||||
span.SetAttributes(semconv.ServerPort(intPort))
|
||||
}
|
||||
|
||||
for _, header := range t.capturedRequestHeaders {
|
||||
// User-agent is already part of the semantic convention as a recommended attribute.
|
||||
if strings.EqualFold(header, "User-Agent") {
|
||||
continue
|
||||
}
|
||||
|
||||
if value := r.Header[header]; value != nil {
|
||||
span.SetAttributes(attribute.StringSlice(fmt.Sprintf("http.request.header.%s", strings.ToLower(header)), value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CaptureServerRequest used to add span attributes from the request as a Server.
|
||||
func (t *Tracer) CaptureServerRequest(span trace.Span, r *http.Request) {
|
||||
if t == nil || span == nil || r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Common attributes https://github.com/open-telemetry/semantic-conventions/blob/v1.26.0/docs/http/http-spans.md#common-attributes
|
||||
span.SetAttributes(semconv.HTTPRequestMethodKey.String(r.Method))
|
||||
span.SetAttributes(semconv.NetworkProtocolVersion(proto(r.Proto)))
|
||||
|
||||
sURL := t.safeURL(r.URL)
|
||||
// Server attributes https://github.com/open-telemetry/semantic-conventions/blob/v1.26.0/docs/http/http-spans.md#http-server-semantic-conventions
|
||||
span.SetAttributes(semconv.HTTPRequestBodySize(int(r.ContentLength)))
|
||||
span.SetAttributes(semconv.URLPath(sURL.Path))
|
||||
span.SetAttributes(semconv.URLQuery(sURL.RawQuery))
|
||||
span.SetAttributes(semconv.URLScheme(r.Header.Get("X-Forwarded-Proto")))
|
||||
span.SetAttributes(semconv.UserAgentOriginal(r.UserAgent()))
|
||||
span.SetAttributes(semconv.ServerAddress(r.Host))
|
||||
|
||||
host, port, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
span.SetAttributes(semconv.ClientAddress(r.RemoteAddr))
|
||||
span.SetAttributes(semconv.NetworkPeerAddress(r.Host))
|
||||
} else {
|
||||
span.SetAttributes(semconv.NetworkPeerAddress(host))
|
||||
span.SetAttributes(semconv.ClientAddress(host))
|
||||
intPort, _ := strconv.Atoi(port)
|
||||
span.SetAttributes(semconv.ClientPort(intPort))
|
||||
span.SetAttributes(semconv.NetworkPeerPort(intPort))
|
||||
}
|
||||
|
||||
for _, header := range t.capturedRequestHeaders {
|
||||
// User-agent is already part of the semantic convention as a recommended attribute.
|
||||
if strings.EqualFold(header, "User-Agent") {
|
||||
continue
|
||||
}
|
||||
|
||||
if value := r.Header[header]; value != nil {
|
||||
span.SetAttributes(attribute.StringSlice(fmt.Sprintf("http.request.header.%s", strings.ToLower(header)), value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CaptureResponse captures the response attributes to the span.
|
||||
func (t *Tracer) CaptureResponse(span trace.Span, responseHeaders http.Header, code int, spanKind trace.SpanKind) {
|
||||
if t == nil || span == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var status codes.Code
|
||||
var desc string
|
||||
switch spanKind {
|
||||
case trace.SpanKindServer:
|
||||
status, desc = serverStatus(code)
|
||||
case trace.SpanKindClient:
|
||||
status, desc = clientStatus(code)
|
||||
default:
|
||||
status, desc = defaultStatus(code)
|
||||
}
|
||||
span.SetStatus(status, desc)
|
||||
if code > 0 {
|
||||
span.SetAttributes(semconv.HTTPResponseStatusCode(code))
|
||||
}
|
||||
|
||||
for _, header := range t.capturedResponseHeaders {
|
||||
if value := responseHeaders[header]; value != nil {
|
||||
span.SetAttributes(attribute.StringSlice(fmt.Sprintf("http.response.header.%s", strings.ToLower(header)), value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tracer) safeURL(originalURL *url.URL) *url.URL {
|
||||
if originalURL == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
redactedURL := *originalURL
|
||||
|
||||
// Redact password if exists.
|
||||
if redactedURL.User != nil {
|
||||
redactedURL.User = url.UserPassword("REDACTED", "REDACTED")
|
||||
}
|
||||
|
||||
// Redact query parameters.
|
||||
query := redactedURL.Query()
|
||||
for k := range query {
|
||||
if slices.Contains(t.safeQueryParams, k) {
|
||||
continue
|
||||
}
|
||||
|
||||
query.Set(k, "REDACTED")
|
||||
}
|
||||
redactedURL.RawQuery = query.Encode()
|
||||
|
||||
return &redactedURL
|
||||
}
|
||||
|
||||
func proto(proto string) string {
|
||||
switch proto {
|
||||
case "HTTP/1.0":
|
||||
return "1.0"
|
||||
case "HTTP/1.1":
|
||||
return "1.1"
|
||||
case "HTTP/2":
|
||||
return "2"
|
||||
case "HTTP/3":
|
||||
return "3"
|
||||
default:
|
||||
return proto
|
||||
}
|
||||
}
|
||||
|
||||
// serverStatus returns a span status code and message for an HTTP status code
|
||||
// value returned by a server. Status codes in the 400-499 range are not
|
||||
// returned as errors.
|
||||
func serverStatus(code int) (codes.Code, string) {
|
||||
if code < 100 || code >= 600 {
|
||||
return codes.Error, fmt.Sprintf("Invalid HTTP status code %d", code)
|
||||
}
|
||||
if code >= 500 {
|
||||
return codes.Error, ""
|
||||
}
|
||||
return codes.Unset, ""
|
||||
}
|
||||
|
||||
// clientStatus returns a span status code and message for an HTTP status code
|
||||
// value returned by a server. Status codes in the 400-499 range are not
|
||||
// returned as errors.
|
||||
func clientStatus(code int) (codes.Code, string) {
|
||||
if code < 100 || code >= 600 {
|
||||
return codes.Error, fmt.Sprintf("Invalid HTTP status code %d", code)
|
||||
}
|
||||
if code >= 400 {
|
||||
return codes.Error, ""
|
||||
}
|
||||
return codes.Unset, ""
|
||||
}
|
||||
|
||||
// defaultStatus returns a span status code and message for an HTTP status code
|
||||
// value generated internally.
|
||||
func defaultStatus(code int) (codes.Code, string) {
|
||||
if code < 100 || code >= 600 {
|
||||
return codes.Error, fmt.Sprintf("Invalid HTTP status code %d", code)
|
||||
}
|
||||
if code >= 500 {
|
||||
return codes.Error, ""
|
||||
}
|
||||
return codes.Unset, ""
|
||||
}
|
||||
|
||||
// canonicalizeHeaders converts a slice of header keys to their canonical form.
|
||||
// It uses http.CanonicalHeaderKey to ensure that the headers are in a consistent format.
|
||||
func canonicalizeHeaders(headers []string) []string {
|
||||
if headers == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
canonicalHeaders := make([]string, len(headers))
|
||||
for i, header := range headers {
|
||||
canonicalHeaders[i] = http.CanonicalHeaderKey(header)
|
||||
}
|
||||
|
||||
return canonicalHeaders
|
||||
}
|
||||
504
pkg/observability/tracing/tracing_test.go
Normal file
504
pkg/observability/tracing/tracing_test.go
Normal file
|
|
@ -0,0 +1,504 @@
|
|||
package tracing
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/alice"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/traefik/traefik/v3/pkg/config/static"
|
||||
otypes "github.com/traefik/traefik/v3/pkg/observability/types"
|
||||
"go.opentelemetry.io/collector/pdata/pcommon"
|
||||
"go.opentelemetry.io/collector/pdata/ptrace"
|
||||
"go.opentelemetry.io/collector/pdata/ptrace/ptraceotlp"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.opentelemetry.io/otel/trace/noop"
|
||||
)
|
||||
|
||||
func Test_safeFullURL(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
safeQueryParams []string
|
||||
originalURL *url.URL
|
||||
expectedURL *url.URL
|
||||
}{
|
||||
{
|
||||
desc: "Nil URL",
|
||||
originalURL: nil,
|
||||
expectedURL: nil,
|
||||
},
|
||||
{
|
||||
desc: "No query parameters",
|
||||
originalURL: &url.URL{Scheme: "https", Host: "example.com"},
|
||||
expectedURL: &url.URL{Scheme: "https", Host: "example.com"},
|
||||
},
|
||||
{
|
||||
desc: "All query parameters redacted",
|
||||
originalURL: &url.URL{Scheme: "https", Host: "example.com", RawQuery: "foo=bar&baz=qux"},
|
||||
expectedURL: &url.URL{Scheme: "https", Host: "example.com", RawQuery: "baz=REDACTED&foo=REDACTED"},
|
||||
},
|
||||
{
|
||||
desc: "Some query parameters unredacted",
|
||||
safeQueryParams: []string{"foo"},
|
||||
originalURL: &url.URL{Scheme: "https", Host: "example.com", RawQuery: "foo=bar&baz=qux"},
|
||||
expectedURL: &url.URL{Scheme: "https", Host: "example.com", RawQuery: "baz=REDACTED&foo=bar"},
|
||||
},
|
||||
{
|
||||
desc: "User info and some query parameters redacted",
|
||||
safeQueryParams: []string{"foo"},
|
||||
originalURL: &url.URL{Scheme: "https", Host: "example.com", User: url.UserPassword("username", "password"), RawQuery: "foo=bar&baz=qux"},
|
||||
expectedURL: &url.URL{Scheme: "https", Host: "example.com", User: url.UserPassword("REDACTED", "REDACTED"), RawQuery: "baz=REDACTED&foo=bar"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tr := NewTracer(nil, nil, nil, test.safeQueryParams)
|
||||
|
||||
gotURL := tr.safeURL(test.originalURL)
|
||||
|
||||
assert.Equal(t, test.expectedURL, gotURL)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTracing(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
propagators string
|
||||
headers map[string]string
|
||||
resourceAttributes map[string]string
|
||||
wantServiceHeadersFn func(t *testing.T, headers http.Header)
|
||||
assertFn func(*testing.T, ptrace.Traces)
|
||||
}{
|
||||
{
|
||||
desc: "service name and version",
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
attributes := resourceAttributes(traces)
|
||||
assert.Equal(t, "traefik", attributes["service.name"])
|
||||
assert.Equal(t, "dev", attributes["service.version"])
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "resource attributes must be propagated",
|
||||
resourceAttributes: map[string]string{
|
||||
"service.environment": "custom",
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
attributes := resourceAttributes(traces)
|
||||
assert.Equal(t, "custom", attributes["service.environment"])
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "TraceContext propagation",
|
||||
propagators: "tracecontext",
|
||||
headers: map[string]string{
|
||||
"traceparent": "00-00000000000000000000000000000001-0000000000000001-01",
|
||||
"tracestate": "foo=bar",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(00-00000000000000000000000000000001-\w{16}-01)`, headers["Traceparent"][0])
|
||||
assert.Equal(t, []string{"foo=bar"}, headers["Tracestate"])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Equal(t, "00000000000000000000000000000001", span.TraceID().String())
|
||||
assert.Equal(t, "0000000000000001", span.ParentSpanID().String())
|
||||
assert.Equal(t, "foo=bar", span.TraceState().AsRaw())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "root span TraceContext propagation",
|
||||
propagators: "tracecontext",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(00-\w{32}-\w{16}-01)`, headers["Traceparent"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "B3 propagation",
|
||||
propagators: "b3",
|
||||
headers: map[string]string{
|
||||
"b3": "00000000000000000000000000000001-0000000000000002-1-0000000000000001",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(00000000000000000000000000000001-\w{16}-1)`, headers["B3"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Equal(t, "00000000000000000000000000000001", span.TraceID().String())
|
||||
assert.Equal(t, "0000000000000002", span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "root span B3 propagation",
|
||||
propagators: "b3",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(\w{32}-\w{16}-1)`, headers["B3"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "B3 propagation Multiple Headers",
|
||||
propagators: "b3multi",
|
||||
headers: map[string]string{
|
||||
"x-b3-traceid": "00000000000000000000000000000001",
|
||||
"x-b3-parentspanid": "0000000000000001",
|
||||
"x-b3-spanid": "0000000000000002",
|
||||
"x-b3-sampled": "1",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Equal(t, "00000000000000000000000000000001", headers["X-B3-Traceid"][0])
|
||||
assert.Equal(t, "0000000000000001", headers["X-B3-Parentspanid"][0])
|
||||
assert.Equal(t, "1", headers["X-B3-Sampled"][0])
|
||||
assert.Len(t, headers["X-B3-Spanid"][0], 16)
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Equal(t, "00000000000000000000000000000001", span.TraceID().String())
|
||||
assert.Equal(t, "0000000000000002", span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "root span B3 propagation Multiple Headers",
|
||||
propagators: "b3multi",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(\w{32})`, headers["X-B3-Traceid"][0])
|
||||
assert.Equal(t, "1", headers["X-B3-Sampled"][0])
|
||||
assert.Regexp(t, `(\w{16})`, headers["X-B3-Spanid"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "Baggage propagation",
|
||||
propagators: "baggage",
|
||||
headers: map[string]string{
|
||||
"baggage": "userId=id",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Equal(t, []string{"userId=id"}, headers["Baggage"])
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "Jaeger propagation",
|
||||
propagators: "jaeger",
|
||||
headers: map[string]string{
|
||||
"uber-trace-id": "00000000000000000000000000000001:0000000000000002:0000000000000001:1",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(00000000000000000000000000000001:\w{16}:0:1)`, headers["Uber-Trace-Id"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Equal(t, "00000000000000000000000000000001", span.TraceID().String())
|
||||
assert.Len(t, span.ParentSpanID().String(), 16)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "root span Jaeger propagation",
|
||||
propagators: "jaeger",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(\w{32}:\w{16}:0:1)`, headers["Uber-Trace-Id"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "XRay propagation",
|
||||
propagators: "xray",
|
||||
headers: map[string]string{
|
||||
"X-Amzn-Trace-Id": "Root=1-5759e988-bd862e3fe1be46a994272793;Parent=53995c3f42cd8ad8;Sampled=1",
|
||||
},
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(Root=1-5759e988-bd862e3fe1be46a994272793;Parent=\w{16};Sampled=1)`, headers["X-Amzn-Trace-Id"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Equal(t, "5759e988bd862e3fe1be46a994272793", span.TraceID().String())
|
||||
assert.Len(t, span.ParentSpanID().String(), 16)
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "root span XRay propagation",
|
||||
propagators: "xray",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Regexp(t, `(Root=1-\w{8}-\w{24};Parent=\w{16};Sampled=1)`, headers["X-Amzn-Trace-Id"][0])
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "no propagation",
|
||||
propagators: "none",
|
||||
wantServiceHeadersFn: func(t *testing.T, headers http.Header) {
|
||||
t.Helper()
|
||||
|
||||
assert.Empty(t, headers)
|
||||
},
|
||||
assertFn: func(t *testing.T, traces ptrace.Traces) {
|
||||
t.Helper()
|
||||
|
||||
span := mainSpan(traces)
|
||||
assert.Len(t, span.TraceID().String(), 32)
|
||||
assert.Empty(t, span.ParentSpanID().String())
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
traceCh := make(chan ptrace.Traces)
|
||||
collector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gzr, err := gzip.NewReader(r.Body)
|
||||
require.NoError(t, err)
|
||||
|
||||
body, err := io.ReadAll(gzr)
|
||||
require.NoError(t, err)
|
||||
|
||||
req := ptraceotlp.NewExportRequest()
|
||||
err = req.UnmarshalProto(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
traceCh <- req.Traces()
|
||||
}))
|
||||
t.Cleanup(collector.Close)
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Setenv("OTEL_PROPAGATORS", test.propagators)
|
||||
|
||||
service := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
tracer := TracerFromContext(r.Context())
|
||||
ctx, span := tracer.Start(r.Context(), "service")
|
||||
defer span.End()
|
||||
|
||||
r = r.WithContext(ctx)
|
||||
|
||||
InjectContextIntoCarrier(r)
|
||||
|
||||
if test.wantServiceHeadersFn != nil {
|
||||
test.wantServiceHeadersFn(t, r.Header)
|
||||
}
|
||||
})
|
||||
|
||||
tracingConfig := &static.Tracing{
|
||||
ServiceName: "traefik",
|
||||
SampleRate: 1.0,
|
||||
ResourceAttributes: test.resourceAttributes,
|
||||
OTLP: &otypes.OTelTracing{
|
||||
HTTP: &otypes.OTelHTTP{
|
||||
Endpoint: collector.URL,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tracer, closer, err := NewTracing(t.Context(), tracingConfig)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
_ = closer.Close()
|
||||
})
|
||||
|
||||
chain := alice.New(func(next http.Handler) (http.Handler, error) {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
tracingCtx := ExtractCarrierIntoContext(r.Context(), r.Header)
|
||||
start := time.Now()
|
||||
tracingCtx, span := tracer.Start(tracingCtx, "test", trace.WithSpanKind(trace.SpanKindServer), trace.WithTimestamp(start))
|
||||
end := time.Now()
|
||||
span.End(trace.WithTimestamp(end))
|
||||
next.ServeHTTP(w, r.WithContext(tracingCtx))
|
||||
}), nil
|
||||
})
|
||||
|
||||
epHandler, err := chain.Then(service)
|
||||
require.NoError(t, err)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "http://www.test.com", nil)
|
||||
for k, v := range test.headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
rw := httptest.NewRecorder()
|
||||
|
||||
epHandler.ServeHTTP(rw, req)
|
||||
|
||||
select {
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Error("Trace not exported")
|
||||
|
||||
case traces := <-traceCh:
|
||||
assert.Equal(t, http.StatusOK, rw.Code)
|
||||
if test.assertFn != nil {
|
||||
test.assertFn(t, traces)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestTracerProvider ensures that Tracer returns a valid TracerProvider
|
||||
// when using the default Traefik Tracer and a custom one.
|
||||
func TestTracerProvider(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
otlpConfig := &otypes.OTelTracing{}
|
||||
otlpConfig.SetDefaults()
|
||||
|
||||
config := &static.Tracing{OTLP: otlpConfig}
|
||||
tracer, closer, err := NewTracing(t.Context(), config)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
closer.Close()
|
||||
|
||||
_, span := tracer.Start(t.Context(), "test")
|
||||
defer span.End()
|
||||
|
||||
span.TracerProvider().Tracer("github.com/traefik/traefik")
|
||||
span.TracerProvider().Tracer("other")
|
||||
}
|
||||
|
||||
// TestNewTracer_HeadersCanonicalization tests that NewTracer properly canonicalizes headers.
|
||||
func TestNewTracer_HeadersCanonicalization(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
inputHeaders []string
|
||||
expectedCanonicalHeaders []string
|
||||
}{
|
||||
{
|
||||
desc: "Empty headers",
|
||||
inputHeaders: []string{},
|
||||
expectedCanonicalHeaders: []string{},
|
||||
},
|
||||
{
|
||||
desc: "Already canonical headers",
|
||||
inputHeaders: []string{"Content-Type", "User-Agent", "Accept-Encoding"},
|
||||
expectedCanonicalHeaders: []string{"Content-Type", "User-Agent", "Accept-Encoding"},
|
||||
},
|
||||
{
|
||||
desc: "Lowercase headers",
|
||||
inputHeaders: []string{"content-type", "user-agent", "accept-encoding"},
|
||||
expectedCanonicalHeaders: []string{"Content-Type", "User-Agent", "Accept-Encoding"},
|
||||
},
|
||||
{
|
||||
desc: "Mixed case headers",
|
||||
inputHeaders: []string{"CoNtEnT-tYpE", "uSeR-aGeNt", "aCcEpT-eNcOdInG"},
|
||||
expectedCanonicalHeaders: []string{"Content-Type", "User-Agent", "Accept-Encoding"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Create a mock tracer using a no-op tracer from OpenTelemetry
|
||||
mockTracer := noop.NewTracerProvider().Tracer("test")
|
||||
|
||||
// Test capturedRequestHeaders
|
||||
tracer := NewTracer(mockTracer, test.inputHeaders, nil, nil)
|
||||
assert.Equal(t, test.expectedCanonicalHeaders, tracer.capturedRequestHeaders)
|
||||
assert.Nil(t, tracer.capturedResponseHeaders)
|
||||
|
||||
// Test capturedResponseHeaders
|
||||
tracer = NewTracer(mockTracer, nil, test.inputHeaders, nil)
|
||||
assert.Equal(t, test.expectedCanonicalHeaders, tracer.capturedResponseHeaders)
|
||||
assert.Nil(t, tracer.capturedRequestHeaders)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// resourceAttributes extracts resource attributes as a map.
|
||||
func resourceAttributes(traces ptrace.Traces) map[string]string {
|
||||
attributes := make(map[string]string)
|
||||
if traces.ResourceSpans().Len() > 0 {
|
||||
resource := traces.ResourceSpans().At(0).Resource()
|
||||
resource.Attributes().Range(func(k string, v pcommon.Value) bool {
|
||||
if v.Type() == pcommon.ValueTypeStr {
|
||||
attributes[k] = v.Str()
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
return attributes
|
||||
}
|
||||
|
||||
// mainSpan gets the main span from traces (assumes single span for testing).
|
||||
func mainSpan(traces ptrace.Traces) ptrace.Span {
|
||||
for _, resourceSpans := range traces.ResourceSpans().All() {
|
||||
for _, scopeSpans := range resourceSpans.ScopeSpans().All() {
|
||||
if scopeSpans.Spans().Len() > 0 {
|
||||
return scopeSpans.Spans().At(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
return ptrace.NewSpan()
|
||||
}
|
||||
276
pkg/observability/types/logs.go
Normal file
276
pkg/observability/types/logs.go
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
package types
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
|
||||
"github.com/traefik/paerser/types"
|
||||
ttypes "github.com/traefik/traefik/v3/pkg/types"
|
||||
"github.com/traefik/traefik/v3/pkg/version"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp"
|
||||
otelsdk "go.opentelemetry.io/otel/sdk/log"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/encoding/gzip"
|
||||
)
|
||||
|
||||
const (
|
||||
// AccessLogKeep is the keep string value.
|
||||
AccessLogKeep = "keep"
|
||||
// AccessLogDrop is the drop string value.
|
||||
AccessLogDrop = "drop"
|
||||
// AccessLogRedact is the redact string value.
|
||||
AccessLogRedact = "redact"
|
||||
)
|
||||
|
||||
const (
|
||||
// CommonFormat is the common logging format (CLF).
|
||||
CommonFormat string = "common"
|
||||
)
|
||||
|
||||
const OTelTraefikServiceName = "traefik"
|
||||
|
||||
// TraefikLog holds the configuration settings for the traefik logger.
|
||||
type TraefikLog struct {
|
||||
Level string `description:"Log level set to traefik logs." json:"level,omitempty" toml:"level,omitempty" yaml:"level,omitempty" export:"true"`
|
||||
Format string `description:"Traefik log format: json | common" json:"format,omitempty" toml:"format,omitempty" yaml:"format,omitempty" export:"true"`
|
||||
NoColor bool `description:"When using the 'common' format, disables the colorized output." json:"noColor,omitempty" toml:"noColor,omitempty" yaml:"noColor,omitempty" export:"true"`
|
||||
|
||||
FilePath string `description:"Traefik log file path. Stdout is used when omitted or empty." json:"filePath,omitempty" toml:"filePath,omitempty" yaml:"filePath,omitempty"`
|
||||
MaxSize int `description:"Maximum size in megabytes of the log file before it gets rotated." json:"maxSize,omitempty" toml:"maxSize,omitempty" yaml:"maxSize,omitempty" export:"true"`
|
||||
MaxAge int `description:"Maximum number of days to retain old log files based on the timestamp encoded in their filename." json:"maxAge,omitempty" toml:"maxAge,omitempty" yaml:"maxAge,omitempty" export:"true"`
|
||||
MaxBackups int `description:"Maximum number of old log files to retain." json:"maxBackups,omitempty" toml:"maxBackups,omitempty" yaml:"maxBackups,omitempty" export:"true"`
|
||||
Compress bool `description:"Determines if the rotated log files should be compressed using gzip." json:"compress,omitempty" toml:"compress,omitempty" yaml:"compress,omitempty" export:"true"`
|
||||
|
||||
OTLP *OTelLog `description:"Settings for OpenTelemetry." json:"otlp,omitempty" toml:"otlp,omitempty" yaml:"otlp,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (l *TraefikLog) SetDefaults() {
|
||||
l.Format = CommonFormat
|
||||
l.Level = "ERROR"
|
||||
}
|
||||
|
||||
// AccessLog holds the configuration settings for the access logger (middlewares/accesslog).
|
||||
type AccessLog struct {
|
||||
FilePath string `description:"Access log file path. Stdout is used when omitted or empty." json:"filePath,omitempty" toml:"filePath,omitempty" yaml:"filePath,omitempty"`
|
||||
Format string `description:"Access log format: json, common, or genericCLF" json:"format,omitempty" toml:"format,omitempty" yaml:"format,omitempty" export:"true"`
|
||||
Filters *AccessLogFilters `description:"Access log filters, used to keep only specific access logs." json:"filters,omitempty" toml:"filters,omitempty" yaml:"filters,omitempty" export:"true"`
|
||||
Fields *AccessLogFields `description:"AccessLogFields." json:"fields,omitempty" toml:"fields,omitempty" yaml:"fields,omitempty" export:"true"`
|
||||
BufferingSize int64 `description:"Number of access log lines to process in a buffered way." json:"bufferingSize,omitempty" toml:"bufferingSize,omitempty" yaml:"bufferingSize,omitempty" export:"true"`
|
||||
AddInternals bool `description:"Enables access log for internal services (ping, dashboard, etc...)." json:"addInternals,omitempty" toml:"addInternals,omitempty" yaml:"addInternals,omitempty" export:"true"`
|
||||
|
||||
OTLP *OTelLog `description:"Settings for OpenTelemetry." json:"otlp,omitempty" toml:"otlp,omitempty" yaml:"otlp,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (l *AccessLog) SetDefaults() {
|
||||
l.Format = CommonFormat
|
||||
l.FilePath = ""
|
||||
l.Filters = &AccessLogFilters{}
|
||||
l.Fields = &AccessLogFields{}
|
||||
l.Fields.SetDefaults()
|
||||
}
|
||||
|
||||
// AccessLogFilters holds filters configuration.
|
||||
type AccessLogFilters struct {
|
||||
StatusCodes []string `description:"Keep access logs with status codes in the specified range." json:"statusCodes,omitempty" toml:"statusCodes,omitempty" yaml:"statusCodes,omitempty" export:"true"`
|
||||
RetryAttempts bool `description:"Keep access logs when at least one retry happened." json:"retryAttempts,omitempty" toml:"retryAttempts,omitempty" yaml:"retryAttempts,omitempty" export:"true"`
|
||||
MinDuration types.Duration `description:"Keep access logs when request took longer than the specified duration." json:"minDuration,omitempty" toml:"minDuration,omitempty" yaml:"minDuration,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// FieldHeaders holds configuration for access log headers.
|
||||
type FieldHeaders struct {
|
||||
DefaultMode string `description:"Default mode for fields: keep | drop | redact" json:"defaultMode,omitempty" toml:"defaultMode,omitempty" yaml:"defaultMode,omitempty" export:"true"`
|
||||
Names map[string]string `description:"Override mode for headers" json:"names,omitempty" toml:"names,omitempty" yaml:"names,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// AccessLogFields holds configuration for access log fields.
|
||||
type AccessLogFields struct {
|
||||
DefaultMode string `description:"Default mode for fields: keep | drop" json:"defaultMode,omitempty" toml:"defaultMode,omitempty" yaml:"defaultMode,omitempty" export:"true"`
|
||||
Names map[string]string `description:"Override mode for fields" json:"names,omitempty" toml:"names,omitempty" yaml:"names,omitempty" export:"true"`
|
||||
Headers *FieldHeaders `description:"Headers to keep, drop or redact" json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (f *AccessLogFields) SetDefaults() {
|
||||
f.DefaultMode = AccessLogKeep
|
||||
f.Headers = &FieldHeaders{
|
||||
DefaultMode: AccessLogDrop,
|
||||
}
|
||||
}
|
||||
|
||||
// Keep check if the field need to be kept or dropped.
|
||||
func (f *AccessLogFields) Keep(field string) bool {
|
||||
defaultKeep := true
|
||||
if f != nil {
|
||||
defaultKeep = checkFieldValue(f.DefaultMode, defaultKeep)
|
||||
|
||||
if v, ok := f.Names[field]; ok {
|
||||
return checkFieldValue(v, defaultKeep)
|
||||
}
|
||||
}
|
||||
return defaultKeep
|
||||
}
|
||||
|
||||
// KeepHeader checks if the headers need to be kept, dropped or redacted and returns the status.
|
||||
func (f *AccessLogFields) KeepHeader(header string) string {
|
||||
defaultValue := AccessLogKeep
|
||||
if f != nil && f.Headers != nil {
|
||||
defaultValue = checkFieldHeaderValue(f.Headers.DefaultMode, defaultValue)
|
||||
|
||||
if v, ok := f.Headers.Names[header]; ok {
|
||||
return checkFieldHeaderValue(v, defaultValue)
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func checkFieldValue(value string, defaultKeep bool) bool {
|
||||
switch value {
|
||||
case AccessLogKeep:
|
||||
return true
|
||||
case AccessLogDrop:
|
||||
return false
|
||||
default:
|
||||
return defaultKeep
|
||||
}
|
||||
}
|
||||
|
||||
func checkFieldHeaderValue(value, defaultValue string) string {
|
||||
if value == AccessLogKeep || value == AccessLogDrop || value == AccessLogRedact {
|
||||
return value
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// OTelLog provides configuration settings for the open-telemetry logger.
|
||||
type OTelLog struct {
|
||||
ServiceName string `description:"Defines the service name resource attribute." json:"serviceName,omitempty" toml:"serviceName,omitempty" yaml:"serviceName,omitempty" export:"true"`
|
||||
ResourceAttributes map[string]string `description:"Defines additional resource attributes (key:value)." json:"resourceAttributes,omitempty" toml:"resourceAttributes,omitempty" yaml:"resourceAttributes,omitempty"`
|
||||
GRPC *OTelGRPC `description:"gRPC configuration for the OpenTelemetry collector." json:"grpc,omitempty" toml:"grpc,omitempty" yaml:"grpc,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
HTTP *OTelHTTP `description:"HTTP configuration for the OpenTelemetry collector." json:"http,omitempty" toml:"http,omitempty" yaml:"http,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (o *OTelLog) SetDefaults() {
|
||||
o.ServiceName = OTelTraefikServiceName
|
||||
o.HTTP = &OTelHTTP{}
|
||||
o.HTTP.SetDefaults()
|
||||
}
|
||||
|
||||
// NewLoggerProvider creates a new OpenTelemetry logger provider.
|
||||
func (o *OTelLog) NewLoggerProvider(ctx context.Context) (*otelsdk.LoggerProvider, error) {
|
||||
var (
|
||||
err error
|
||||
exporter otelsdk.Exporter
|
||||
)
|
||||
if o.GRPC != nil {
|
||||
exporter, err = o.buildGRPCExporter()
|
||||
} else {
|
||||
exporter, err = o.buildHTTPExporter()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setting up exporter: %w", err)
|
||||
}
|
||||
|
||||
var resAttrs []attribute.KeyValue
|
||||
for k, v := range o.ResourceAttributes {
|
||||
resAttrs = append(resAttrs, attribute.String(k, v))
|
||||
}
|
||||
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithContainer(),
|
||||
resource.WithHost(),
|
||||
resource.WithOS(),
|
||||
resource.WithProcess(),
|
||||
resource.WithTelemetrySDK(),
|
||||
resource.WithDetectors(ttypes.K8sAttributesDetector{}),
|
||||
// The following order allows the user to override the service name and version,
|
||||
// as well as any other attributes set by the above detectors.
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(o.ServiceName),
|
||||
semconv.ServiceVersion(version.Version),
|
||||
),
|
||||
resource.WithAttributes(resAttrs...),
|
||||
// Use the environment variables to allow overriding above resource attributes.
|
||||
resource.WithFromEnv(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("building resource: %w", err)
|
||||
}
|
||||
|
||||
// Register the trace provider to allow the global logger to access it.
|
||||
bp := otelsdk.NewBatchProcessor(exporter)
|
||||
loggerProvider := otelsdk.NewLoggerProvider(
|
||||
otelsdk.WithResource(res),
|
||||
otelsdk.WithProcessor(bp),
|
||||
)
|
||||
|
||||
return loggerProvider, nil
|
||||
}
|
||||
|
||||
func (o *OTelLog) buildHTTPExporter() (*otlploghttp.Exporter, error) {
|
||||
endpoint, err := url.Parse(o.HTTP.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", o.HTTP.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlploghttp.Option{
|
||||
otlploghttp.WithEndpoint(endpoint.Host),
|
||||
otlploghttp.WithHeaders(o.HTTP.Headers),
|
||||
otlploghttp.WithCompression(otlploghttp.GzipCompression),
|
||||
}
|
||||
|
||||
if endpoint.Scheme == "http" {
|
||||
opts = append(opts, otlploghttp.WithInsecure())
|
||||
}
|
||||
|
||||
if endpoint.Path != "" {
|
||||
opts = append(opts, otlploghttp.WithURLPath(endpoint.Path))
|
||||
}
|
||||
|
||||
if o.HTTP.TLS != nil {
|
||||
tlsConfig, err := o.HTTP.TLS.CreateTLSConfig(context.Background())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlploghttp.WithTLSClientConfig(tlsConfig))
|
||||
}
|
||||
|
||||
return otlploghttp.New(context.Background(), opts...)
|
||||
}
|
||||
|
||||
func (o *OTelLog) buildGRPCExporter() (*otlploggrpc.Exporter, error) {
|
||||
host, port, err := net.SplitHostPort(o.GRPC.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", o.GRPC.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlploggrpc.Option{
|
||||
otlploggrpc.WithEndpoint(fmt.Sprintf("%s:%s", host, port)),
|
||||
otlploggrpc.WithHeaders(o.GRPC.Headers),
|
||||
otlploggrpc.WithCompressor(gzip.Name),
|
||||
}
|
||||
|
||||
if o.GRPC.Insecure {
|
||||
opts = append(opts, otlploggrpc.WithInsecure())
|
||||
}
|
||||
|
||||
if o.GRPC.TLS != nil {
|
||||
tlsConfig, err := o.GRPC.TLS.CreateTLSConfig(context.Background())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlploggrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
|
||||
}
|
||||
|
||||
return otlploggrpc.New(context.Background(), opts...)
|
||||
}
|
||||
143
pkg/observability/types/metrics.go
Normal file
143
pkg/observability/types/metrics.go
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
package types
|
||||
|
||||
import (
|
||||
"net"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/traefik/paerser/types"
|
||||
)
|
||||
|
||||
// Metrics provides options to expose and send Traefik metrics to different third party monitoring systems.
|
||||
type Metrics struct {
|
||||
AddInternals bool `description:"Enables metrics for internal services (ping, dashboard, etc...)." json:"addInternals,omitempty" toml:"addInternals,omitempty" yaml:"addInternals,omitempty" export:"true"`
|
||||
|
||||
Prometheus *Prometheus `description:"Prometheus metrics exporter type." json:"prometheus,omitempty" toml:"prometheus,omitempty" yaml:"prometheus,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
Datadog *Datadog `description:"Datadog metrics exporter type." json:"datadog,omitempty" toml:"datadog,omitempty" yaml:"datadog,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
StatsD *Statsd `description:"StatsD metrics exporter type." json:"statsD,omitempty" toml:"statsD,omitempty" yaml:"statsD,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
InfluxDB2 *InfluxDB2 `description:"InfluxDB v2 metrics exporter type." json:"influxDB2,omitempty" toml:"influxDB2,omitempty" yaml:"influxDB2,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
OTLP *OTLP `description:"OpenTelemetry metrics exporter type." json:"otlp,omitempty" toml:"otlp,omitempty" yaml:"otlp,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
}
|
||||
|
||||
// Prometheus can contain specific configuration used by the Prometheus Metrics exporter.
|
||||
type Prometheus struct {
|
||||
Buckets []float64 `description:"Buckets for latency metrics." json:"buckets,omitempty" toml:"buckets,omitempty" yaml:"buckets,omitempty" export:"true"`
|
||||
AddEntryPointsLabels bool `description:"Enable metrics on entry points." json:"addEntryPointsLabels,omitempty" toml:"addEntryPointsLabels,omitempty" yaml:"addEntryPointsLabels,omitempty" export:"true"`
|
||||
AddRoutersLabels bool `description:"Enable metrics on routers." json:"addRoutersLabels,omitempty" toml:"addRoutersLabels,omitempty" yaml:"addRoutersLabels,omitempty" export:"true"`
|
||||
AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"`
|
||||
EntryPoint string `description:"EntryPoint" json:"entryPoint,omitempty" toml:"entryPoint,omitempty" yaml:"entryPoint,omitempty" export:"true"`
|
||||
ManualRouting bool `description:"Manual routing" json:"manualRouting,omitempty" toml:"manualRouting,omitempty" yaml:"manualRouting,omitempty" export:"true"`
|
||||
HeaderLabels map[string]string `description:"Defines the extra labels for the requests_total metrics, and for each of them, the request header containing the value for this label." json:"headerLabels,omitempty" toml:"headerLabels,omitempty" yaml:"headerLabels,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (p *Prometheus) SetDefaults() {
|
||||
p.Buckets = []float64{0.1, 0.3, 1.2, 5}
|
||||
p.AddEntryPointsLabels = true
|
||||
p.AddServicesLabels = true
|
||||
p.EntryPoint = "traefik"
|
||||
}
|
||||
|
||||
// Datadog contains address and metrics pushing interval configuration.
|
||||
type Datadog struct {
|
||||
Address string `description:"Datadog's address." json:"address,omitempty" toml:"address,omitempty" yaml:"address,omitempty"`
|
||||
PushInterval types.Duration `description:"Datadog push interval." json:"pushInterval,omitempty" toml:"pushInterval,omitempty" yaml:"pushInterval,omitempty" export:"true"`
|
||||
AddEntryPointsLabels bool `description:"Enable metrics on entry points." json:"addEntryPointsLabels,omitempty" toml:"addEntryPointsLabels,omitempty" yaml:"addEntryPointsLabels,omitempty" export:"true"`
|
||||
AddRoutersLabels bool `description:"Enable metrics on routers." json:"addRoutersLabels,omitempty" toml:"addRoutersLabels,omitempty" yaml:"addRoutersLabels,omitempty" export:"true"`
|
||||
AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"`
|
||||
Prefix string `description:"Prefix to use for metrics collection." json:"prefix,omitempty" toml:"prefix,omitempty" yaml:"prefix,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (d *Datadog) SetDefaults() {
|
||||
host, ok := os.LookupEnv("DD_AGENT_HOST")
|
||||
if !ok {
|
||||
host = "localhost"
|
||||
}
|
||||
|
||||
port, ok := os.LookupEnv("DD_DOGSTATSD_PORT")
|
||||
if !ok {
|
||||
port = "8125"
|
||||
}
|
||||
d.Address = net.JoinHostPort(host, port)
|
||||
d.PushInterval = types.Duration(10 * time.Second)
|
||||
d.AddEntryPointsLabels = true
|
||||
d.AddServicesLabels = true
|
||||
d.Prefix = "traefik"
|
||||
}
|
||||
|
||||
// Statsd contains address and metrics pushing interval configuration.
|
||||
type Statsd struct {
|
||||
Address string `description:"StatsD address." json:"address,omitempty" toml:"address,omitempty" yaml:"address,omitempty"`
|
||||
PushInterval types.Duration `description:"StatsD push interval." json:"pushInterval,omitempty" toml:"pushInterval,omitempty" yaml:"pushInterval,omitempty" export:"true"`
|
||||
AddEntryPointsLabels bool `description:"Enable metrics on entry points." json:"addEntryPointsLabels,omitempty" toml:"addEntryPointsLabels,omitempty" yaml:"addEntryPointsLabels,omitempty" export:"true"`
|
||||
AddRoutersLabels bool `description:"Enable metrics on routers." json:"addRoutersLabels,omitempty" toml:"addRoutersLabels,omitempty" yaml:"addRoutersLabels,omitempty" export:"true"`
|
||||
AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"`
|
||||
Prefix string `description:"Prefix to use for metrics collection." json:"prefix,omitempty" toml:"prefix,omitempty" yaml:"prefix,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (s *Statsd) SetDefaults() {
|
||||
s.Address = "localhost:8125"
|
||||
s.PushInterval = types.Duration(10 * time.Second)
|
||||
s.AddEntryPointsLabels = true
|
||||
s.AddServicesLabels = true
|
||||
s.Prefix = "traefik"
|
||||
}
|
||||
|
||||
// InfluxDB2 contains address, token and metrics pushing interval configuration.
|
||||
type InfluxDB2 struct {
|
||||
Address string `description:"InfluxDB v2 address." json:"address,omitempty" toml:"address,omitempty" yaml:"address,omitempty"`
|
||||
Token string `description:"InfluxDB v2 access token." json:"token,omitempty" toml:"token,omitempty" yaml:"token,omitempty" loggable:"false"`
|
||||
PushInterval types.Duration `description:"InfluxDB v2 push interval." json:"pushInterval,omitempty" toml:"pushInterval,omitempty" yaml:"pushInterval,omitempty" export:"true"`
|
||||
Org string `description:"InfluxDB v2 org ID." json:"org,omitempty" toml:"org,omitempty" yaml:"org,omitempty" export:"true"`
|
||||
Bucket string `description:"InfluxDB v2 bucket ID." json:"bucket,omitempty" toml:"bucket,omitempty" yaml:"bucket,omitempty" export:"true"`
|
||||
AddEntryPointsLabels bool `description:"Enable metrics on entry points." json:"addEntryPointsLabels,omitempty" toml:"addEntryPointsLabels,omitempty" yaml:"addEntryPointsLabels,omitempty" export:"true"`
|
||||
AddRoutersLabels bool `description:"Enable metrics on routers." json:"addRoutersLabels,omitempty" toml:"addRoutersLabels,omitempty" yaml:"addRoutersLabels,omitempty" export:"true"`
|
||||
AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"`
|
||||
AdditionalLabels map[string]string `description:"Additional labels (influxdb tags) on all metrics" json:"additionalLabels,omitempty" toml:"additionalLabels,omitempty" yaml:"additionalLabels,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (i *InfluxDB2) SetDefaults() {
|
||||
i.Address = "http://localhost:8086"
|
||||
i.PushInterval = types.Duration(10 * time.Second)
|
||||
i.AddEntryPointsLabels = true
|
||||
i.AddServicesLabels = true
|
||||
}
|
||||
|
||||
// OTLP contains specific configuration used by the OpenTelemetry Metrics exporter.
|
||||
type OTLP struct {
|
||||
GRPC *OTelGRPC `description:"gRPC configuration for the OpenTelemetry collector." json:"grpc,omitempty" toml:"grpc,omitempty" yaml:"grpc,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
HTTP *OTelHTTP `description:"HTTP configuration for the OpenTelemetry collector." json:"http,omitempty" toml:"http,omitempty" yaml:"http,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
|
||||
AddEntryPointsLabels bool `description:"Enable metrics on entry points." json:"addEntryPointsLabels,omitempty" toml:"addEntryPointsLabels,omitempty" yaml:"addEntryPointsLabels,omitempty" export:"true"`
|
||||
AddRoutersLabels bool `description:"Enable metrics on routers." json:"addRoutersLabels,omitempty" toml:"addRoutersLabels,omitempty" yaml:"addRoutersLabels,omitempty" export:"true"`
|
||||
AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"`
|
||||
ExplicitBoundaries []float64 `description:"Boundaries for latency metrics." json:"explicitBoundaries,omitempty" toml:"explicitBoundaries,omitempty" yaml:"explicitBoundaries,omitempty" export:"true"`
|
||||
PushInterval types.Duration `description:"Period between calls to collect a checkpoint." json:"pushInterval,omitempty" toml:"pushInterval,omitempty" yaml:"pushInterval,omitempty" export:"true"`
|
||||
ServiceName string `description:"Defines the service name resource attribute." json:"serviceName,omitempty" toml:"serviceName,omitempty" yaml:"serviceName,omitempty" export:"true"`
|
||||
ResourceAttributes map[string]string `description:"Defines additional resource attributes (key:value)." json:"resourceAttributes,omitempty" toml:"resourceAttributes,omitempty" yaml:"resourceAttributes,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (o *OTLP) SetDefaults() {
|
||||
o.HTTP = &OTelHTTP{}
|
||||
o.HTTP.SetDefaults()
|
||||
|
||||
o.AddEntryPointsLabels = true
|
||||
o.AddServicesLabels = true
|
||||
o.ExplicitBoundaries = []float64{.005, .01, .025, .05, .075, .1, .25, .5, .75, 1, 2.5, 5, 7.5, 10}
|
||||
o.PushInterval = types.Duration(10 * time.Second)
|
||||
o.ServiceName = OTelTraefikServiceName
|
||||
}
|
||||
|
||||
// Statistics provides options for monitoring request and response stats.
|
||||
type Statistics struct {
|
||||
RecentErrors int `description:"Number of recent errors logged." json:"recentErrors,omitempty" toml:"recentErrors,omitempty" yaml:"recentErrors,omitempty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (s *Statistics) SetDefaults() {
|
||||
s.RecentErrors = 10
|
||||
}
|
||||
28
pkg/observability/types/otel.go
Normal file
28
pkg/observability/types/otel.go
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
package types
|
||||
|
||||
import "github.com/traefik/traefik/v3/pkg/types"
|
||||
|
||||
// OTelGRPC provides configuration settings for the gRPC open-telemetry.
|
||||
type OTelGRPC struct {
|
||||
Endpoint string `description:"Sets the gRPC endpoint (host:port) of the collector." json:"endpoint,omitempty" toml:"endpoint,omitempty" yaml:"endpoint,omitempty"`
|
||||
Insecure bool `description:"Disables client transport security for the exporter." json:"insecure,omitempty" toml:"insecure,omitempty" yaml:"insecure,omitempty" export:"true"`
|
||||
TLS *types.ClientTLS `description:"Defines client transport security parameters." json:"tls,omitempty" toml:"tls,omitempty" yaml:"tls,omitempty" export:"true"`
|
||||
Headers map[string]string `description:"Headers sent with payload." json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (o *OTelGRPC) SetDefaults() {
|
||||
o.Endpoint = "localhost:4317"
|
||||
}
|
||||
|
||||
// OTelHTTP provides configuration settings for the HTTP open-telemetry.
|
||||
type OTelHTTP struct {
|
||||
Endpoint string `description:"Sets the HTTP endpoint (scheme://host:port/path) of the collector." json:"endpoint,omitempty" toml:"endpoint,omitempty" yaml:"endpoint,omitempty"`
|
||||
TLS *types.ClientTLS `description:"Defines client transport security parameters." json:"tls,omitempty" toml:"tls,omitempty" yaml:"tls,omitempty" export:"true"`
|
||||
Headers map[string]string `description:"Headers sent with payload." json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (o *OTelHTTP) SetDefaults() {
|
||||
o.Endpoint = "https://localhost:4318"
|
||||
}
|
||||
186
pkg/observability/types/tracing.go
Normal file
186
pkg/observability/types/tracing.go
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
package types
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
ttypes "github.com/traefik/traefik/v3/pkg/types"
|
||||
"github.com/traefik/traefik/v3/pkg/version"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/encoding/gzip"
|
||||
)
|
||||
|
||||
type TracingVerbosity string
|
||||
|
||||
const (
|
||||
MinimalVerbosity TracingVerbosity = "minimal"
|
||||
DetailedVerbosity TracingVerbosity = "detailed"
|
||||
)
|
||||
|
||||
func (v TracingVerbosity) Allows(verbosity TracingVerbosity) bool {
|
||||
switch v {
|
||||
case DetailedVerbosity:
|
||||
return verbosity == DetailedVerbosity || verbosity == MinimalVerbosity
|
||||
default:
|
||||
return verbosity == MinimalVerbosity
|
||||
}
|
||||
}
|
||||
|
||||
// OTelTracing provides configuration settings for the open-telemetry tracer.
|
||||
type OTelTracing struct {
|
||||
GRPC *OTelGRPC `description:"gRPC configuration for the OpenTelemetry collector." json:"grpc,omitempty" toml:"grpc,omitempty" yaml:"grpc,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
HTTP *OTelHTTP `description:"HTTP configuration for the OpenTelemetry collector." json:"http,omitempty" toml:"http,omitempty" yaml:"http,omitempty" label:"allowEmpty" file:"allowEmpty" export:"true"`
|
||||
}
|
||||
|
||||
// SetDefaults sets the default values.
|
||||
func (c *OTelTracing) SetDefaults() {
|
||||
c.HTTP = &OTelHTTP{}
|
||||
c.HTTP.SetDefaults()
|
||||
}
|
||||
|
||||
// Setup sets up the tracer.
|
||||
func (c *OTelTracing) Setup(ctx context.Context, serviceName string, sampleRate float64, resourceAttributes map[string]string) (trace.Tracer, io.Closer, error) {
|
||||
var (
|
||||
err error
|
||||
exporter *otlptrace.Exporter
|
||||
)
|
||||
if c.GRPC != nil {
|
||||
exporter, err = c.setupGRPCExporter()
|
||||
} else {
|
||||
exporter, err = c.setupHTTPExporter()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("setting up exporter: %w", err)
|
||||
}
|
||||
|
||||
var resAttrs []attribute.KeyValue
|
||||
for k, v := range resourceAttributes {
|
||||
resAttrs = append(resAttrs, attribute.String(k, v))
|
||||
}
|
||||
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithContainer(),
|
||||
resource.WithHost(),
|
||||
resource.WithOS(),
|
||||
resource.WithProcess(),
|
||||
resource.WithTelemetrySDK(),
|
||||
resource.WithDetectors(ttypes.K8sAttributesDetector{}),
|
||||
// The following order allows the user to override the service name and version,
|
||||
// as well as any other attributes set by the above detectors.
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(serviceName),
|
||||
semconv.ServiceVersion(version.Version),
|
||||
),
|
||||
resource.WithAttributes(resAttrs...),
|
||||
// Use the environment variables to allow overriding above resource attributes.
|
||||
resource.WithFromEnv(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("building resource: %w", err)
|
||||
}
|
||||
|
||||
// Register the trace exporter with a TracerProvider, using a batch
|
||||
// span processor to aggregate spans before export.
|
||||
bsp := sdktrace.NewBatchSpanProcessor(exporter)
|
||||
tracerProvider := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithSampler(sdktrace.TraceIDRatioBased(sampleRate)),
|
||||
sdktrace.WithResource(res),
|
||||
sdktrace.WithSpanProcessor(bsp),
|
||||
)
|
||||
|
||||
otel.SetTracerProvider(tracerProvider)
|
||||
|
||||
log.Debug().Msg("OpenTelemetry tracer configured")
|
||||
|
||||
return tracerProvider.Tracer("github.com/traefik/traefik"), &tpCloser{provider: tracerProvider}, err
|
||||
}
|
||||
|
||||
func (c *OTelTracing) setupHTTPExporter() (*otlptrace.Exporter, error) {
|
||||
endpoint, err := url.Parse(c.HTTP.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", c.HTTP.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlptracehttp.Option{
|
||||
otlptracehttp.WithEndpoint(endpoint.Host),
|
||||
otlptracehttp.WithHeaders(c.HTTP.Headers),
|
||||
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
|
||||
}
|
||||
|
||||
if endpoint.Scheme == "http" {
|
||||
opts = append(opts, otlptracehttp.WithInsecure())
|
||||
}
|
||||
|
||||
if endpoint.Path != "" {
|
||||
opts = append(opts, otlptracehttp.WithURLPath(endpoint.Path))
|
||||
}
|
||||
|
||||
if c.HTTP.TLS != nil {
|
||||
tlsConfig, err := c.HTTP.TLS.CreateTLSConfig(context.Background())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlptracehttp.WithTLSClientConfig(tlsConfig))
|
||||
}
|
||||
|
||||
return otlptrace.New(context.Background(), otlptracehttp.NewClient(opts...))
|
||||
}
|
||||
|
||||
func (c *OTelTracing) setupGRPCExporter() (*otlptrace.Exporter, error) {
|
||||
host, port, err := net.SplitHostPort(c.GRPC.Endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid collector endpoint %q: %w", c.GRPC.Endpoint, err)
|
||||
}
|
||||
|
||||
opts := []otlptracegrpc.Option{
|
||||
otlptracegrpc.WithEndpoint(fmt.Sprintf("%s:%s", host, port)),
|
||||
otlptracegrpc.WithHeaders(c.GRPC.Headers),
|
||||
otlptracegrpc.WithCompressor(gzip.Name),
|
||||
}
|
||||
|
||||
if c.GRPC.Insecure {
|
||||
opts = append(opts, otlptracegrpc.WithInsecure())
|
||||
}
|
||||
|
||||
if c.GRPC.TLS != nil {
|
||||
tlsConfig, err := c.GRPC.TLS.CreateTLSConfig(context.Background())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating TLS client config: %w", err)
|
||||
}
|
||||
|
||||
opts = append(opts, otlptracegrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
|
||||
}
|
||||
|
||||
return otlptrace.New(context.Background(), otlptracegrpc.NewClient(opts...))
|
||||
}
|
||||
|
||||
// tpCloser converts a TraceProvider into an io.Closer.
|
||||
type tpCloser struct {
|
||||
provider *sdktrace.TracerProvider
|
||||
}
|
||||
|
||||
func (t *tpCloser) Close() error {
|
||||
if t == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(5*time.Second))
|
||||
defer cancel()
|
||||
|
||||
return t.provider.Shutdown(ctx)
|
||||
}
|
||||
72
pkg/observability/types/tracing_test.go
Normal file
72
pkg/observability/types/tracing_test.go
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
package types
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestTracingVerbosity_Allows(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
from TracingVerbosity
|
||||
to TracingVerbosity
|
||||
allows bool
|
||||
}{
|
||||
{
|
||||
desc: "minimal vs minimal",
|
||||
from: MinimalVerbosity,
|
||||
to: MinimalVerbosity,
|
||||
allows: true,
|
||||
},
|
||||
{
|
||||
desc: "minimal vs detailed",
|
||||
from: MinimalVerbosity,
|
||||
to: DetailedVerbosity,
|
||||
allows: false,
|
||||
},
|
||||
{
|
||||
desc: "detailed vs minimal",
|
||||
from: DetailedVerbosity,
|
||||
to: MinimalVerbosity,
|
||||
allows: true,
|
||||
},
|
||||
{
|
||||
desc: "detailed vs detailed",
|
||||
from: DetailedVerbosity,
|
||||
to: DetailedVerbosity,
|
||||
allows: true,
|
||||
},
|
||||
{
|
||||
desc: "unknown vs minimal",
|
||||
from: TracingVerbosity("unknown"),
|
||||
to: MinimalVerbosity,
|
||||
allows: true,
|
||||
},
|
||||
{
|
||||
desc: "unknown vs detailed",
|
||||
from: TracingVerbosity("unknown"),
|
||||
to: DetailedVerbosity,
|
||||
allows: false,
|
||||
},
|
||||
{
|
||||
desc: "minimal vs unknown",
|
||||
from: MinimalVerbosity,
|
||||
to: TracingVerbosity("unknown"),
|
||||
allows: false,
|
||||
},
|
||||
{
|
||||
desc: "detailed vs unknown",
|
||||
from: DetailedVerbosity,
|
||||
to: TracingVerbosity("unknown"),
|
||||
allows: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
require.Equal(t, test.allows, test.from.Allows(test.to))
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue