1
0
Fork 0

Add Metrics

This commit is contained in:
Michael 2019-07-18 21:36:05 +02:00 committed by Traefiker Bot
parent 4dc448056c
commit 8e97af8dc3
121 changed files with 8364 additions and 3811 deletions

View file

@ -119,3 +119,9 @@ type StartSpanConfig struct {
// then this will also set the TraceID to the same value.
SpanID uint64
}
// Logger implementations are able to log given messages that the tracer might output.
type Logger interface {
// Log prints the given message.
Log(msg string)
}

View file

@ -1,69 +0,0 @@
package tracer
import (
"fmt"
"log"
"strconv"
)
var errorPrefix = fmt.Sprintf("Datadog Tracer Error (%s): ", tracerVersion)
type traceEncodingError struct{ context error }
func (e *traceEncodingError) Error() string {
return fmt.Sprintf("error encoding trace: %s", e.context)
}
type spanBufferFullError struct{}
func (e *spanBufferFullError) Error() string {
return fmt.Sprintf("trace span cap (%d) reached, dropping trace", traceMaxSize)
}
type dataLossError struct {
count int // number of items lost
context error // any context error, if available
}
func (e *dataLossError) Error() string {
return fmt.Sprintf("lost traces (count: %d), error: %v", e.count, e.context)
}
type errorSummary struct {
Count int
Example string
}
func aggregateErrors(errChan <-chan error) map[string]errorSummary {
errs := make(map[string]errorSummary, len(errChan))
for {
select {
case err := <-errChan:
if err == nil {
break
}
key := fmt.Sprintf("%T", err)
summary := errs[key]
summary.Count++
summary.Example = err.Error()
errs[key] = summary
default: // stop when there's no more data
return errs
}
}
}
// logErrors logs the errors, preventing log file flooding, when there
// are many messages, it caps them and shows a quick summary.
// As of today it only logs using standard golang log package, but
// later we could send those stats to agent // TODO(ufoot).
func logErrors(errChan <-chan error) {
errs := aggregateErrors(errChan)
for _, v := range errs {
var repeat string
if v.Count > 1 {
repeat = " (repeated " + strconv.Itoa(v.Count) + " times)"
}
log.Println(errorPrefix + v.Example + repeat)
}
}

View file

@ -1,7 +1,6 @@
package tracer
import (
"log"
"net/http"
"os"
"path/filepath"
@ -10,6 +9,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
"gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)
// config holds the tracer configuration.
@ -43,6 +43,10 @@ type config struct {
// hostname is automatically assigned when the DD_TRACE_REPORT_HOSTNAME is set to true,
// and is added as a special tag to the root span of traces.
hostname string
// logger specifies the logger to use when printing errors. If not specified, the "log" package
// will be used.
logger ddtrace.Logger
}
// StartOption represents a function that can be provided as a parameter to Start.
@ -58,11 +62,18 @@ func defaults(c *config) {
var err error
c.hostname, err = os.Hostname()
if err != nil {
log.Printf("%sunable to look up hostname: %v\n", errorPrefix, err)
log.Warn("unable to look up hostname: %v", err)
}
}
}
// WithLogger sets logger as the tracer's error printer.
func WithLogger(logger ddtrace.Logger) StartOption {
return func(c *config) {
c.logger = logger
}
}
// WithPrioritySampling is deprecated, and priority sampling is enabled by default.
// When using distributed tracing, the priority sampling value is propagated in order to
// get all the parts of a distributed trace sampled.

View file

@ -2,12 +2,13 @@ package tracer
import (
cryptorand "crypto/rand"
"log"
"math"
"math/big"
"math/rand"
"sync"
"time"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)
// random holds a thread-safe source of random numbers.
@ -19,7 +20,7 @@ func init() {
if err == nil {
seed = n.Int64()
} else {
log.Printf("%scannot generate random seed: %v; using current time\n", errorPrefix, err)
log.Warn("cannot generate random seed: %v; using current time", err)
seed = time.Now().UnixNano()
}
random = rand.New(&safeSource{

View file

@ -5,6 +5,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)
var _ ddtrace.SpanContext = (*spanContext)(nil)
@ -199,10 +200,7 @@ func (t *trace) push(sp *span) {
// capacity is reached, we will not be able to complete this trace.
t.full = true
t.spans = nil // GC
if tr, ok := internal.GetGlobalTracer().(*tracer); ok {
// we have a tracer we can submit errors too.
tr.pushError(&spanBufferFullError{})
}
log.Error("trace buffer full (%d), dropping trace", traceMaxSize)
return
}
if v, ok := sp.Metrics[keySamplingPriority]; ok {

View file

@ -1,10 +1,11 @@
package tracer
import (
"log"
"time"
"golang.org/x/sys/windows"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)
// This method is more precise than the go1.8 time.Now on Windows
@ -26,10 +27,9 @@ var now func() int64
// precise implementation based on time.Now()
func init() {
if err := windows.LoadGetSystemTimePreciseAsFileTime(); err != nil {
log.Printf("Unable to load high precison timer, defaulting to time.Now()")
log.Warn("Unable to load high precison timer, defaulting to time.Now()")
now = lowPrecisionNow
} else {
log.Printf("Using high precision timer")
now = highPrecisionNow
}
}

View file

@ -1,8 +1,6 @@
package tracer
import (
"errors"
"log"
"os"
"strconv"
"time"
@ -10,6 +8,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)
var _ ddtrace.Tracer = (*tracer)(nil)
@ -28,11 +27,9 @@ type tracer struct {
flushAllReq chan chan<- struct{}
flushTracesReq chan struct{}
flushErrorsReq chan struct{}
exitReq chan struct{}
payloadQueue chan []*span
errorBuffer chan error
// stopped is a channel that will be closed when the worker has exited.
stopped chan struct{}
@ -75,6 +72,7 @@ func Start(opts ...StartOption) {
// Stop stops the started tracer. Subsequent calls are valid but become no-op.
func Stop() {
internal.SetGlobalTracer(&internal.NoopTracer{})
log.Flush()
}
// Span is an alias for ddtrace.Span. It is here to allow godoc to group methods returning
@ -102,13 +100,8 @@ func Inject(ctx ddtrace.SpanContext, carrier interface{}) error {
return internal.GetGlobalTracer().Inject(ctx, carrier)
}
const (
// payloadQueueSize is the buffer size of the trace channel.
payloadQueueSize = 1000
// errorBufferSize is the buffer size of the error channel.
errorBufferSize = 200
)
// payloadQueueSize is the buffer size of the trace channel.
const payloadQueueSize = 1000
func newTracer(opts ...StartOption) *tracer {
c := new(config)
@ -122,15 +115,19 @@ func newTracer(opts ...StartOption) *tracer {
if c.propagator == nil {
c.propagator = NewPropagator(nil)
}
if c.logger != nil {
log.UseLogger(c.logger)
}
if c.debug {
log.SetLevel(log.LevelDebug)
}
t := &tracer{
config: c,
payload: newPayload(),
flushAllReq: make(chan chan<- struct{}),
flushTracesReq: make(chan struct{}, 1),
flushErrorsReq: make(chan struct{}, 1),
exitReq: make(chan struct{}),
payloadQueue: make(chan []*span, payloadQueueSize),
errorBuffer: make(chan error, errorBufferSize),
stopped: make(chan struct{}),
prioritySampling: newPrioritySampler(),
pid: strconv.Itoa(os.Getpid()),
@ -161,10 +158,7 @@ func (t *tracer) worker() {
done <- struct{}{}
case <-t.flushTracesReq:
t.flushTraces()
case <-t.flushErrorsReq:
t.flushErrors()
t.flush()
case <-t.exitReq:
t.flush()
@ -182,10 +176,7 @@ func (t *tracer) pushTrace(trace []*span) {
select {
case t.payloadQueue <- trace:
default:
t.pushError(&dataLossError{
context: errors.New("payload queue full, dropping trace"),
count: len(trace),
})
log.Error("payload queue full, dropping %d traces", len(trace))
}
if t.syncPush != nil {
// only in tests
@ -193,28 +184,6 @@ func (t *tracer) pushTrace(trace []*span) {
}
}
func (t *tracer) pushError(err error) {
select {
case <-t.stopped:
return
default:
}
if len(t.errorBuffer) >= cap(t.errorBuffer)/2 { // starts being full, anticipate, try and flush soon
select {
case t.flushErrorsReq <- struct{}{}:
default: // a flush was already requested, skip
}
}
select {
case t.errorBuffer <- err:
default:
// OK, if we get this, our error error buffer is full,
// we can assume it is filled with meaningful messages which
// are going to be logged and hopefully read, nothing better
// we can do, blocking would make things worse.
}
}
// StartSpan creates, starts, and returns a new Span with the given `operationName`.
func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOption) ddtrace.Span {
var opts ddtrace.StartSpanConfig
@ -313,18 +282,16 @@ func (t *tracer) Extract(carrier interface{}) (ddtrace.SpanContext, error) {
return t.config.propagator.Extract(carrier)
}
// flushTraces will push any currently buffered traces to the server.
func (t *tracer) flushTraces() {
// flush will push any currently buffered traces to the server.
func (t *tracer) flush() {
if t.payload.itemCount() == 0 {
return
}
size, count := t.payload.size(), t.payload.itemCount()
if t.config.debug {
log.Printf("Sending payload: size: %d traces: %d\n", size, count)
}
log.Debug("Sending payload: size: %d traces: %d\n", size, count)
rc, err := t.config.transport.send(t.payload)
if err != nil {
t.pushError(&dataLossError{context: err, count: count})
log.Error("lost %d traces: %v", count, err)
}
if err == nil {
t.prioritySampling.readRatesJSON(rc) // TODO: handle error?
@ -332,16 +299,6 @@ func (t *tracer) flushTraces() {
t.payload.reset()
}
// flushErrors will process log messages that were queued
func (t *tracer) flushErrors() {
logErrors(t.errorBuffer)
}
func (t *tracer) flush() {
t.flushTraces()
t.flushErrors()
}
// forceFlush forces a flush of data (traces and services) to the agent.
// Flushes are done by a background task on a regular basis, so you never
// need to call this manually, mostly useful for testing and debugging.
@ -355,7 +312,7 @@ func (t *tracer) forceFlush() {
// larger than the threshold as a result, it sends a flush request.
func (t *tracer) pushPayload(trace []*span) {
if err := t.payload.push(trace); err != nil {
t.pushError(&traceEncodingError{context: err})
log.Error("error encoding msgpack: %v", err)
}
if t.payload.size() > payloadSizeLimit {
// getting large

View file

@ -10,13 +10,11 @@ import (
"strconv"
"strings"
"time"
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"
)
var (
// TODO(gbbr): find a more effective way to keep this up to date,
// e.g. via `go generate`
tracerVersion = "v1.13.1"
// We copy the transport to avoid using the default one, as it might be
// augmented with tracing and we don't want these calls to be recorded.
// See https://golang.org/pkg/net/http/#DefaultTransport .
@ -84,7 +82,7 @@ func newHTTPTransport(addr string, roundTripper http.RoundTripper) *httpTranspor
"Datadog-Meta-Lang": "go",
"Datadog-Meta-Lang-Version": strings.TrimPrefix(runtime.Version(), "go"),
"Datadog-Meta-Lang-Interpreter": runtime.Compiler + "-" + runtime.GOARCH + "-" + runtime.GOOS,
"Datadog-Meta-Tracer-Version": tracerVersion,
"Datadog-Meta-Tracer-Version": version.Tag,
"Content-Type": "application/msgpack",
}
return &httpTransport{

View file

@ -0,0 +1,163 @@
// Package log provides logging utilities for the tracer.
package log
import (
"fmt"
"log"
"os"
"strconv"
"sync"
"time"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"
)
// Level specifies the logging level that the log package prints at.
type Level int
const (
// LevelDebug represents debug level messages.
LevelDebug Level = iota
// LevelWarn represents warning and errors.
LevelWarn
)
var prefixMsg = fmt.Sprintf("Datadog Tracer %s", version.Tag)
var (
mu sync.RWMutex // guards below fields
level = LevelWarn
logger ddtrace.Logger = &defaultLogger{l: log.New(os.Stderr, "", log.LstdFlags)}
)
// UseLogger sets l as the active logger.
func UseLogger(l ddtrace.Logger) {
mu.Lock()
defer mu.Unlock()
logger = l
}
// SetLevel sets the given lvl for logging.
func SetLevel(lvl Level) {
mu.Lock()
defer mu.Unlock()
level = lvl
}
// Debug prints the given message if the level is LevelDebug.
func Debug(fmt string, a ...interface{}) {
mu.RLock()
lvl := level
mu.RUnlock()
if lvl != LevelDebug {
return
}
printMsg("DEBUG", fmt, a...)
}
// Warn prints a warning message.
func Warn(fmt string, a ...interface{}) {
printMsg("WARN", fmt, a...)
}
var (
errmu sync.RWMutex // guards below fields
erragg = map[string]*errorReport{} // aggregated errors
errrate = time.Minute // the rate at which errors are reported
erron bool // true if errors are being aggregated
)
func init() {
if v := os.Getenv("DD_LOGGING_RATE"); v != "" {
if sec, err := strconv.ParseUint(v, 10, 64); err != nil {
Warn("Invalid value for DD_LOGGING_RATE: %v", err)
} else {
errrate = time.Duration(sec) * time.Second
}
}
}
type errorReport struct {
first time.Time // time when first error occurred
err error
count uint64
}
// Error reports an error. Errors get aggregated and logged periodically. The
// default is once per minute or once every DD_LOGGING_RATE number of seconds.
func Error(format string, a ...interface{}) {
key := format // format should 99.9% of the time be constant
if reachedLimit(key) {
// avoid too much lock contention on spammy errors
return
}
errmu.Lock()
defer errmu.Unlock()
report, ok := erragg[key]
if !ok {
erragg[key] = &errorReport{
err: fmt.Errorf(format, a...),
first: time.Now(),
}
report = erragg[key]
}
report.count++
if errrate == 0 {
flushLocked()
return
}
if !erron {
erron = true
time.AfterFunc(errrate, Flush)
}
}
// defaultErrorLimit specifies the maximum number of errors gathered in a report.
const defaultErrorLimit = 200
// reachedLimit reports whether the maximum count has been reached for this key.
func reachedLimit(key string) bool {
errmu.RLock()
e, ok := erragg[key]
confirm := ok && e.count > defaultErrorLimit
errmu.RUnlock()
return confirm
}
// Flush flushes and resets all aggregated errors to the logger.
func Flush() {
errmu.Lock()
defer errmu.Unlock()
flushLocked()
}
func flushLocked() {
for _, report := range erragg {
msg := fmt.Sprintf("%v", report.err)
if report.count > defaultErrorLimit {
msg += fmt.Sprintf(", %d+ additional messages skipped (first occurrence: %s)", defaultErrorLimit, report.first.Format(time.RFC822))
} else if report.count > 1 {
msg += fmt.Sprintf(", %d additional messages skipped (first occurrence: %s)", report.count-1, report.first.Format(time.RFC822))
} else {
msg += fmt.Sprintf(" (occurred: %s)", report.first.Format(time.RFC822))
}
printMsg("ERROR", msg)
}
for k := range erragg {
// compiler-optimized map-clearing post go1.11 (golang/go#20138)
delete(erragg, k)
}
erron = false
}
func printMsg(lvl, format string, a ...interface{}) {
msg := fmt.Sprintf("%s %s: %s", prefixMsg, lvl, fmt.Sprintf(format, a...))
mu.RLock()
logger.Log(msg)
mu.RUnlock()
}
type defaultLogger struct{ l *log.Logger }
func (p *defaultLogger) Log(msg string) { p.l.Print(msg) }

View file

@ -0,0 +1,6 @@
package version
// Tag specifies the current release tag. It needs to be manually
// updated. A test checks that the value of Tag never points to a
// git tag that is older than HEAD.
const Tag = "v1.15.0"