Add Metrics

2019-07-18 21:36:05 +02:00 · 2019-07-18 21:36:05 +02:00 · 8e97af8dc3
commit 8e97af8dc3
parent 4dc448056c
121 changed files with 8364 additions and 3811 deletions
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ddtrace.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ddtrace.go
@ -119,3 +119,9 @@ type StartSpanConfig struct {
 	// then this will also set the TraceID to the same value.
 	SpanID uint64
 }
+
+// Logger implementations are able to log given messages that the tracer might output.
+type Logger interface {
+	// Log prints the given message.
+	Log(msg string)
+}
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/errors.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/errors.go
@ -1,69 +0,0 @@
-package tracer
-
-import (
-	"fmt"
-	"log"
-	"strconv"
-)
-
-var errorPrefix = fmt.Sprintf("Datadog Tracer Error (%s): ", tracerVersion)
-
-type traceEncodingError struct{ context error }
-
-func (e *traceEncodingError) Error() string {
-	return fmt.Sprintf("error encoding trace: %s", e.context)
-}
-
-type spanBufferFullError struct{}
-
-func (e *spanBufferFullError) Error() string {
-	return fmt.Sprintf("trace span cap (%d) reached, dropping trace", traceMaxSize)
-}
-
-type dataLossError struct {
-	count   int   // number of items lost
-	context error // any context error, if available
-}
-
-func (e *dataLossError) Error() string {
-	return fmt.Sprintf("lost traces (count: %d), error: %v", e.count, e.context)
-}
-
-type errorSummary struct {
-	Count   int
-	Example string
-}
-
-func aggregateErrors(errChan <-chan error) map[string]errorSummary {
-	errs := make(map[string]errorSummary, len(errChan))
-	for {
-		select {
-		case err := <-errChan:
-			if err == nil {
-				break
-			}
-			key := fmt.Sprintf("%T", err)
-			summary := errs[key]
-			summary.Count++
-			summary.Example = err.Error()
-			errs[key] = summary
-		default: // stop when there's no more data
-			return errs
-		}
-	}
-}
-
-// logErrors logs the errors, preventing log file flooding, when there
-// are many messages, it caps them and shows a quick summary.
-// As of today it only logs using standard golang log package, but
-// later we could send those stats to agent // TODO(ufoot).
-func logErrors(errChan <-chan error) {
-	errs := aggregateErrors(errChan)
-	for _, v := range errs {
-		var repeat string
-		if v.Count > 1 {
-			repeat = " (repeated " + strconv.Itoa(v.Count) + " times)"
-		}
-		log.Println(errorPrefix + v.Example + repeat)
-	}
-}
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/option.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/option.go
@ -1,7 +1,6 @@
 package tracer

 import (
-	"log"
 	"net/http"
 	"os"
 	"path/filepath"
@ -10,6 +9,7 @@ import (
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
 	"gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig"
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
 )

 // config holds the tracer configuration.
@ -43,6 +43,10 @@ type config struct {
 	// hostname is automatically assigned when the DD_TRACE_REPORT_HOSTNAME is set to true,
 	// and is added as a special tag to the root span of traces.
 	hostname string
+
+	// logger specifies the logger to use when printing errors. If not specified, the "log" package
+	// will be used.
+	logger ddtrace.Logger
 }

 // StartOption represents a function that can be provided as a parameter to Start.
@ -58,11 +62,18 @@ func defaults(c *config) {
 		var err error
 		c.hostname, err = os.Hostname()
 		if err != nil {
-			log.Printf("%sunable to look up hostname: %v\n", errorPrefix, err)
+			log.Warn("unable to look up hostname: %v", err)
 		}
 	}
 }

+// WithLogger sets logger as the tracer's error printer.
+func WithLogger(logger ddtrace.Logger) StartOption {
+	return func(c *config) {
+		c.logger = logger
+	}
+}
+
 // WithPrioritySampling is deprecated, and priority sampling is enabled by default.
 // When using distributed tracing, the priority sampling value is propagated in order to
 // get all the parts of a distributed trace sampled.
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/rand.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/rand.go
@ -2,12 +2,13 @@ package tracer

 import (
 	cryptorand "crypto/rand"
-	"log"
 	"math"
 	"math/big"
 	"math/rand"
 	"sync"
 	"time"
+
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
 )

 // random holds a thread-safe source of random numbers.
@ -19,7 +20,7 @@ func init() {
 	if err == nil {
 		seed = n.Int64()
 	} else {
-		log.Printf("%scannot generate random seed: %v; using current time\n", errorPrefix, err)
+		log.Warn("cannot generate random seed: %v; using current time", err)
 		seed = time.Now().UnixNano()
 	}
 	random = rand.New(&safeSource{
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/spancontext.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/spancontext.go
@ -5,6 +5,7 @@ import (

 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
 )

 var _ ddtrace.SpanContext = (*spanContext)(nil)
@ -199,10 +200,7 @@ func (t *trace) push(sp *span) {
 		// capacity is reached, we will not be able to complete this trace.
 		t.full = true
 		t.spans = nil // GC
-		if tr, ok := internal.GetGlobalTracer().(*tracer); ok {
-			// we have a tracer we can submit errors too.
-			tr.pushError(&spanBufferFullError{})
-		}
+		log.Error("trace buffer full (%d), dropping trace", traceMaxSize)
 		return
 	}
 	if v, ok := sp.Metrics[keySamplingPriority]; ok {
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/time_windows.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/time_windows.go
@ -1,10 +1,11 @@
 package tracer

 import (
-	"log"
 	"time"

 	"golang.org/x/sys/windows"
+
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
 )

 // This method is more precise than the go1.8 time.Now on Windows
@ -26,10 +27,9 @@ var now func() int64
 // precise implementation based on time.Now()
 func init() {
 	if err := windows.LoadGetSystemTimePreciseAsFileTime(); err != nil {
-		log.Printf("Unable to load high precison timer, defaulting to time.Now()")
+		log.Warn("Unable to load high precison timer, defaulting to time.Now()")
 		now = lowPrecisionNow
 	} else {
-		log.Printf("Using high precision timer")
 		now = highPrecisionNow
 	}
 }
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/tracer.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/tracer.go
@ -1,8 +1,6 @@
 package tracer

 import (
-	"errors"
-	"log"
 	"os"
 	"strconv"
 	"time"
@ -10,6 +8,7 @@ import (
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
 )

 var _ ddtrace.Tracer = (*tracer)(nil)
@ -28,11 +27,9 @@ type tracer struct {

 	flushAllReq    chan chan<- struct{}
 	flushTracesReq chan struct{}
-	flushErrorsReq chan struct{}
 	exitReq        chan struct{}

 	payloadQueue chan []*span
-	errorBuffer  chan error

 	// stopped is a channel that will be closed when the worker has exited.
 	stopped chan struct{}
@ -75,6 +72,7 @@ func Start(opts ...StartOption) {
 // Stop stops the started tracer. Subsequent calls are valid but become no-op.
 func Stop() {
 	internal.SetGlobalTracer(&internal.NoopTracer{})
+	log.Flush()
 }

 // Span is an alias for ddtrace.Span. It is here to allow godoc to group methods returning
@ -102,13 +100,8 @@ func Inject(ctx ddtrace.SpanContext, carrier interface{}) error {
 	return internal.GetGlobalTracer().Inject(ctx, carrier)
 }

-const (
-	// payloadQueueSize is the buffer size of the trace channel.
-	payloadQueueSize = 1000
-
-	// errorBufferSize is the buffer size of the error channel.
-	errorBufferSize = 200
-)
+// payloadQueueSize is the buffer size of the trace channel.
+const payloadQueueSize = 1000

 func newTracer(opts ...StartOption) *tracer {
 	c := new(config)
@ -122,15 +115,19 @@ func newTracer(opts ...StartOption) *tracer {
 	if c.propagator == nil {
 		c.propagator = NewPropagator(nil)
 	}
+	if c.logger != nil {
+		log.UseLogger(c.logger)
+	}
+	if c.debug {
+		log.SetLevel(log.LevelDebug)
+	}
 	t := &tracer{
 		config:           c,
 		payload:          newPayload(),
 		flushAllReq:      make(chan chan<- struct{}),
 		flushTracesReq:   make(chan struct{}, 1),
-		flushErrorsReq:   make(chan struct{}, 1),
 		exitReq:          make(chan struct{}),
 		payloadQueue:     make(chan []*span, payloadQueueSize),
-		errorBuffer:      make(chan error, errorBufferSize),
 		stopped:          make(chan struct{}),
 		prioritySampling: newPrioritySampler(),
 		pid:              strconv.Itoa(os.Getpid()),
@ -161,10 +158,7 @@ func (t *tracer) worker() {
 			done <- struct{}{}

 		case <-t.flushTracesReq:
-			t.flushTraces()
-
-		case <-t.flushErrorsReq:
-			t.flushErrors()
+			t.flush()

 		case <-t.exitReq:
 			t.flush()
@ -182,10 +176,7 @@ func (t *tracer) pushTrace(trace []*span) {
 	select {
 	case t.payloadQueue <- trace:
 	default:
-		t.pushError(&dataLossError{
-			context: errors.New("payload queue full, dropping trace"),
-			count:   len(trace),
-		})
+		log.Error("payload queue full, dropping %d traces", len(trace))
 	}
 	if t.syncPush != nil {
 		// only in tests
@ -193,28 +184,6 @@ func (t *tracer) pushTrace(trace []*span) {
 	}
 }

-func (t *tracer) pushError(err error) {
-	select {
-	case <-t.stopped:
-		return
-	default:
-	}
-	if len(t.errorBuffer) >= cap(t.errorBuffer)/2 { // starts being full, anticipate, try and flush soon
-		select {
-		case t.flushErrorsReq <- struct{}{}:
-		default: // a flush was already requested, skip
-		}
-	}
-	select {
-	case t.errorBuffer <- err:
-	default:
-		// OK, if we get this, our error error buffer is full,
-		// we can assume it is filled with meaningful messages which
-		// are going to be logged and hopefully read, nothing better
-		// we can do, blocking would make things worse.
-	}
-}
-
 // StartSpan creates, starts, and returns a new Span with the given `operationName`.
 func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOption) ddtrace.Span {
 	var opts ddtrace.StartSpanConfig
@ -313,18 +282,16 @@ func (t *tracer) Extract(carrier interface{}) (ddtrace.SpanContext, error) {
 	return t.config.propagator.Extract(carrier)
 }

-// flushTraces will push any currently buffered traces to the server.
-func (t *tracer) flushTraces() {
+// flush will push any currently buffered traces to the server.
+func (t *tracer) flush() {
 	if t.payload.itemCount() == 0 {
 		return
 	}
 	size, count := t.payload.size(), t.payload.itemCount()
-	if t.config.debug {
-		log.Printf("Sending payload: size: %d traces: %d\n", size, count)
-	}
+	log.Debug("Sending payload: size: %d traces: %d\n", size, count)
 	rc, err := t.config.transport.send(t.payload)
 	if err != nil {
-		t.pushError(&dataLossError{context: err, count: count})
+		log.Error("lost %d traces: %v", count, err)
 	}
 	if err == nil {
 		t.prioritySampling.readRatesJSON(rc) // TODO: handle error?
@ -332,16 +299,6 @@ func (t *tracer) flushTraces() {
 	t.payload.reset()
 }

-// flushErrors will process log messages that were queued
-func (t *tracer) flushErrors() {
-	logErrors(t.errorBuffer)
-}
-
-func (t *tracer) flush() {
-	t.flushTraces()
-	t.flushErrors()
-}
-
 // forceFlush forces a flush of data (traces and services) to the agent.
 // Flushes are done by a background task on a regular basis, so you never
 // need to call this manually, mostly useful for testing and debugging.
@ -355,7 +312,7 @@ func (t *tracer) forceFlush() {
 // larger than the threshold as a result, it sends a flush request.
 func (t *tracer) pushPayload(trace []*span) {
 	if err := t.payload.push(trace); err != nil {
-		t.pushError(&traceEncodingError{context: err})
+		log.Error("error encoding msgpack: %v", err)
 	}
 	if t.payload.size() > payloadSizeLimit {
 		// getting large
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/transport.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer/transport.go
@ -10,13 +10,11 @@ import (
 	"strconv"
 	"strings"
 	"time"
+
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/version"
 )

 var (
-	// TODO(gbbr): find a more effective way to keep this up to date,
-	// e.g. via `go generate`
-	tracerVersion = "v1.13.1"
-
 	// We copy the transport to avoid using the default one, as it might be
 	// augmented with tracing and we don't want these calls to be recorded.
 	// See https://golang.org/pkg/net/http/#DefaultTransport .
@ -84,7 +82,7 @@ func newHTTPTransport(addr string, roundTripper http.RoundTripper) *httpTranspor
 		"Datadog-Meta-Lang":             "go",
 		"Datadog-Meta-Lang-Version":     strings.TrimPrefix(runtime.Version(), "go"),
 		"Datadog-Meta-Lang-Interpreter": runtime.Compiler + "-" + runtime.GOARCH + "-" + runtime.GOOS,
-		"Datadog-Meta-Tracer-Version":   tracerVersion,
+		"Datadog-Meta-Tracer-Version":   version.Tag,
 		"Content-Type":                  "application/msgpack",
 	}
 	return &httpTransport{
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/internal/log/log.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/internal/log/log.go
@ -0,0 +1,163 @@
+// Package log provides logging utilities for the tracer.
+package log
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"strconv"
+	"sync"
+	"time"
+
+	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
+	"gopkg.in/DataDog/dd-trace-go.v1/internal/version"
+)
+
+// Level specifies the logging level that the log package prints at.
+type Level int
+
+const (
+	// LevelDebug represents debug level messages.
+	LevelDebug Level = iota
+	// LevelWarn represents warning and errors.
+	LevelWarn
+)
+
+var prefixMsg = fmt.Sprintf("Datadog Tracer %s", version.Tag)
+
+var (
+	mu     sync.RWMutex   // guards below fields
+	level                 = LevelWarn
+	logger ddtrace.Logger = &defaultLogger{l: log.New(os.Stderr, "", log.LstdFlags)}
+)
+
+// UseLogger sets l as the active logger.
+func UseLogger(l ddtrace.Logger) {
+	mu.Lock()
+	defer mu.Unlock()
+	logger = l
+}
+
+// SetLevel sets the given lvl for logging.
+func SetLevel(lvl Level) {
+	mu.Lock()
+	defer mu.Unlock()
+	level = lvl
+}
+
+// Debug prints the given message if the level is LevelDebug.
+func Debug(fmt string, a ...interface{}) {
+	mu.RLock()
+	lvl := level
+	mu.RUnlock()
+	if lvl != LevelDebug {
+		return
+	}
+	printMsg("DEBUG", fmt, a...)
+}
+
+// Warn prints a warning message.
+func Warn(fmt string, a ...interface{}) {
+	printMsg("WARN", fmt, a...)
+}
+
+var (
+	errmu   sync.RWMutex                // guards below fields
+	erragg  = map[string]*errorReport{} // aggregated errors
+	errrate = time.Minute               // the rate at which errors are reported
+	erron   bool                        // true if errors are being aggregated
+)
+
+func init() {
+	if v := os.Getenv("DD_LOGGING_RATE"); v != "" {
+		if sec, err := strconv.ParseUint(v, 10, 64); err != nil {
+			Warn("Invalid value for DD_LOGGING_RATE: %v", err)
+		} else {
+			errrate = time.Duration(sec) * time.Second
+		}
+	}
+}
+
+type errorReport struct {
+	first time.Time // time when first error occurred
+	err   error
+	count uint64
+}
+
+// Error reports an error. Errors get aggregated and logged periodically. The
+// default is once per minute or once every DD_LOGGING_RATE number of seconds.
+func Error(format string, a ...interface{}) {
+	key := format // format should 99.9% of the time be constant
+	if reachedLimit(key) {
+		// avoid too much lock contention on spammy errors
+		return
+	}
+	errmu.Lock()
+	defer errmu.Unlock()
+	report, ok := erragg[key]
+	if !ok {
+		erragg[key] = &errorReport{
+			err:   fmt.Errorf(format, a...),
+			first: time.Now(),
+		}
+		report = erragg[key]
+	}
+	report.count++
+	if errrate == 0 {
+		flushLocked()
+		return
+	}
+	if !erron {
+		erron = true
+		time.AfterFunc(errrate, Flush)
+	}
+}
+
+// defaultErrorLimit specifies the maximum number of errors gathered in a report.
+const defaultErrorLimit = 200
+
+// reachedLimit reports whether the maximum count has been reached for this key.
+func reachedLimit(key string) bool {
+	errmu.RLock()
+	e, ok := erragg[key]
+	confirm := ok && e.count > defaultErrorLimit
+	errmu.RUnlock()
+	return confirm
+}
+
+// Flush flushes and resets all aggregated errors to the logger.
+func Flush() {
+	errmu.Lock()
+	defer errmu.Unlock()
+	flushLocked()
+}
+
+func flushLocked() {
+	for _, report := range erragg {
+		msg := fmt.Sprintf("%v", report.err)
+		if report.count > defaultErrorLimit {
+			msg += fmt.Sprintf(", %d+ additional messages skipped (first occurrence: %s)", defaultErrorLimit, report.first.Format(time.RFC822))
+		} else if report.count > 1 {
+			msg += fmt.Sprintf(", %d additional messages skipped (first occurrence: %s)", report.count-1, report.first.Format(time.RFC822))
+		} else {
+			msg += fmt.Sprintf(" (occurred: %s)", report.first.Format(time.RFC822))
+		}
+		printMsg("ERROR", msg)
+	}
+	for k := range erragg {
+		// compiler-optimized map-clearing post go1.11 (golang/go#20138)
+		delete(erragg, k)
+	}
+	erron = false
+}
+
+func printMsg(lvl, format string, a ...interface{}) {
+	msg := fmt.Sprintf("%s %s: %s", prefixMsg, lvl, fmt.Sprintf(format, a...))
+	mu.RLock()
+	logger.Log(msg)
+	mu.RUnlock()
+}
+
+type defaultLogger struct{ l *log.Logger }
+
+func (p *defaultLogger) Log(msg string) { p.l.Print(msg) }
--- a/vendor/gopkg.in/DataDog/dd-trace-go.v1/internal/version/version.go
+++ b/vendor/gopkg.in/DataDog/dd-trace-go.v1/internal/version/version.go
@ -0,0 +1,6 @@
+package version
+
+// Tag specifies the current release tag. It needs to be manually
+// updated. A test checks that the value of Tag never points to a
+// git tag that is older than HEAD.
+const Tag = "v1.15.0"