1
0
Fork 0

Add TLS certs expiration metric

This commit is contained in:
Sylvain Rabot 2020-12-18 18:44:03 +01:00 committed by GitHub
parent 3140a4e0cd
commit a3327c4430
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 291 additions and 62 deletions

View file

@ -20,18 +20,19 @@ var datadogTicker *time.Ticker
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
const (
ddMetricsServiceReqsName = "service.request.total"
ddMetricsServiceLatencyName = "service.request.duration"
ddRetriesTotalName = "service.retries.total"
ddConfigReloadsName = "config.reload.total"
ddConfigReloadsFailureTagName = "failure"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
ddEntryPointReqsName = "entrypoint.request.total"
ddEntryPointReqDurationName = "entrypoint.request.duration"
ddEntryPointOpenConnsName = "entrypoint.connections.open"
ddOpenConnsName = "service.connections.open"
ddServerUpName = "service.server.up"
ddMetricsServiceReqsName = "service.request.total"
ddMetricsServiceLatencyName = "service.request.duration"
ddRetriesTotalName = "service.retries.total"
ddConfigReloadsName = "config.reload.total"
ddConfigReloadsFailureTagName = "failure"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
ddEntryPointReqsName = "entrypoint.request.total"
ddEntryPointReqDurationName = "entrypoint.request.duration"
ddEntryPointOpenConnsName = "entrypoint.connections.open"
ddOpenConnsName = "service.connections.open"
ddServerUpName = "service.server.up"
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
)
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
@ -41,10 +42,11 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
}
registry := &standardRegistry{
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
}
if config.AddEntryPointsLabels {

View file

@ -36,6 +36,7 @@ func TestDatadog(t *testing.T) {
"traefik.entrypoint.request.duration:10000.000000|h|#entrypoint:test\n",
"traefik.entrypoint.connections.open:1.000000|g|#entrypoint:test\n",
"traefik.service.server.up:1.000000|g|#service:test,url:http://127.0.0.1,one:two\n",
"traefik.tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
}
udp.ShouldReceiveAll(t, expected, func() {
@ -50,5 +51,6 @@ func TestDatadog(t *testing.T) {
datadogRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
datadogRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
datadogRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
}

View file

@ -26,18 +26,19 @@ type influxDBWriter struct {
var influxDBTicker *time.Ticker
const (
influxDBMetricsServiceReqsName = "traefik.service.requests.total"
influxDBMetricsServiceLatencyName = "traefik.service.request.duration"
influxDBRetriesTotalName = "traefik.service.retries.total"
influxDBConfigReloadsName = "traefik.config.reload.total"
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open"
influxDBOpenConnsName = "traefik.service.connections.open"
influxDBServerUpName = "traefik.service.server.up"
influxDBMetricsServiceReqsName = "traefik.service.requests.total"
influxDBMetricsServiceLatencyName = "traefik.service.request.duration"
influxDBRetriesTotalName = "traefik.service.retries.total"
influxDBConfigReloadsName = "traefik.config.reload.total"
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open"
influxDBOpenConnsName = "traefik.service.connections.open"
influxDBServerUpName = "traefik.service.server.up"
influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp"
)
const (
@ -55,10 +56,11 @@ func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry {
}
registry := &standardRegistry{
configReloadsCounter: influxDBClient.NewCounter(influxDBConfigReloadsName),
configReloadsFailureCounter: influxDBClient.NewCounter(influxDBConfigReloadsFailureName),
lastConfigReloadSuccessGauge: influxDBClient.NewGauge(influxDBLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: influxDBClient.NewGauge(influxDBLastConfigReloadFailureName),
configReloadsCounter: influxDBClient.NewCounter(influxDBConfigReloadsName),
configReloadsFailureCounter: influxDBClient.NewCounter(influxDBConfigReloadsFailureName),
lastConfigReloadSuccessGauge: influxDBClient.NewGauge(influxDBLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: influxDBClient.NewGauge(influxDBLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: influxDBClient.NewGauge(influxDBTLSCertsNotAfterTimestampName),
}
if config.AddEntryPointsLabels {

View file

@ -64,6 +64,16 @@ func TestInfluxDB(t *testing.T) {
})
assertMessage(t, msgEntrypoint, expectedEntrypoint)
expectedTLS := []string{
`(traefik\.tls\.certs\.notAfterTimestamp,key=value value=1) [\d]{19}`,
}
msgTLS := udp.ReceiveString(t, func() {
influxDBRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
assertMessage(t, msgTLS, expectedTLS)
}
func TestInfluxDBHTTP(t *testing.T) {
@ -121,6 +131,15 @@ func TestInfluxDBHTTP(t *testing.T) {
msgEntrypoint := <-c
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
expectedTLS := []string{
`(traefik\.tls\.certs\.notAfterTimestamp,key=value value=1) [\d]{19}`,
}
influxDBRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
msgTLS := <-c
assertMessage(t, *msgTLS, expectedTLS)
}
func assertMessage(t *testing.T, msg string, patterns []string) {

View file

@ -21,6 +21,9 @@ type Registry interface {
LastConfigReloadSuccessGauge() metrics.Gauge
LastConfigReloadFailureGauge() metrics.Gauge
// TLS
TLSCertsNotAfterTimestampGauge() metrics.Gauge
// entry point metrics
EntryPointReqsCounter() metrics.Counter
EntryPointReqsTLSCounter() metrics.Counter
@ -50,6 +53,7 @@ func NewMultiRegistry(registries []Registry) Registry {
var configReloadsFailureCounter []metrics.Counter
var lastConfigReloadSuccessGauge []metrics.Gauge
var lastConfigReloadFailureGauge []metrics.Gauge
var tlsCertsNotAfterTimestampGauge []metrics.Gauge
var entryPointReqsCounter []metrics.Counter
var entryPointReqsTLSCounter []metrics.Counter
var entryPointReqDurationHistogram []ScalableHistogram
@ -74,6 +78,9 @@ func NewMultiRegistry(registries []Registry) Registry {
if r.LastConfigReloadFailureGauge() != nil {
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
}
if r.TLSCertsNotAfterTimestampGauge() != nil {
tlsCertsNotAfterTimestampGauge = append(tlsCertsNotAfterTimestampGauge, r.TLSCertsNotAfterTimestampGauge())
}
if r.EntryPointReqsCounter() != nil {
entryPointReqsCounter = append(entryPointReqsCounter, r.EntryPointReqsCounter())
}
@ -113,6 +120,7 @@ func NewMultiRegistry(registries []Registry) Registry {
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
tlsCertsNotAfterTimestampGauge: multi.NewGauge(tlsCertsNotAfterTimestampGauge...),
entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...),
entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...),
entryPointReqDurationHistogram: NewMultiHistogram(entryPointReqDurationHistogram...),
@ -133,6 +141,7 @@ type standardRegistry struct {
configReloadsFailureCounter metrics.Counter
lastConfigReloadSuccessGauge metrics.Gauge
lastConfigReloadFailureGauge metrics.Gauge
tlsCertsNotAfterTimestampGauge metrics.Gauge
entryPointReqsCounter metrics.Counter
entryPointReqsTLSCounter metrics.Counter
entryPointReqDurationHistogram ScalableHistogram
@ -169,6 +178,10 @@ func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
return r.lastConfigReloadFailureGauge
}
func (r *standardRegistry) TLSCertsNotAfterTimestampGauge() metrics.Gauge {
return r.tlsCertsNotAfterTimestampGauge
}
func (r *standardRegistry) EntryPointReqsCounter() metrics.Counter {
return r.entryPointReqsCounter
}

View file

@ -29,6 +29,10 @@ const (
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
// TLS.
metricsTLSPrefix = MetricNamePrefix + "tls_"
tlsCertsNotAfterTimestamp = metricsTLSPrefix + "certs_not_after"
// entry point.
metricEntryPointPrefix = MetricNamePrefix + "entrypoint_"
entryPointReqsTotalName = metricEntryPointPrefix + "requests_total"
@ -121,21 +125,27 @@ func initStandardRegistry(config *types.Prometheus) Registry {
Name: configLastReloadFailureName,
Help: "Last config reload failure",
}, []string{})
tlsCertsNotAfterTimesptamp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: tlsCertsNotAfterTimestamp,
Help: "Certificate expiration timestamp",
}, []string{"cn", "serial", "sans"})
promState.describers = []func(chan<- *stdprometheus.Desc){
configReloads.cv.Describe,
configReloadsFailures.cv.Describe,
lastConfigReloadSuccess.gv.Describe,
lastConfigReloadFailure.gv.Describe,
tlsCertsNotAfterTimesptamp.gv.Describe,
}
reg := &standardRegistry{
epEnabled: config.AddEntryPointsLabels,
svcEnabled: config.AddServicesLabels,
configReloadsCounter: configReloads,
configReloadsFailureCounter: configReloadsFailures,
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
lastConfigReloadFailureGauge: lastConfigReloadFailure,
epEnabled: config.AddEntryPointsLabels,
svcEnabled: config.AddServicesLabels,
configReloadsCounter: configReloads,
configReloadsFailureCounter: configReloadsFailures,
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
lastConfigReloadFailureGauge: lastConfigReloadFailure,
tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimesptamp,
}
if config.AddEntryPointsLabels {
@ -163,11 +173,13 @@ func initStandardRegistry(config *types.Prometheus) Registry {
entryPointReqDurations.hv.Describe,
entryPointOpenConns.gv.Describe,
}...)
reg.entryPointReqsCounter = entryPointReqs
reg.entryPointReqsTLSCounter = entryPointReqsTLS
reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second)
reg.entryPointOpenConnsGauge = entryPointOpenConns
}
if config.AddServicesLabels {
serviceReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: serviceReqsTotalName,

View file

@ -116,6 +116,11 @@ func TestPrometheus(t *testing.T) {
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
prometheusRegistry.
TLSCertsNotAfterTimestampGauge().
With("cn", "value", "serial", "value", "sans", "value").
Set(float64(time.Now().Unix()))
prometheusRegistry.
EntryPointReqsCounter().
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
@ -175,6 +180,15 @@ func TestPrometheus(t *testing.T) {
name: configLastReloadFailureName,
assert: buildTimestampAssert(t, configLastReloadFailureName),
},
{
name: tlsCertsNotAfterTimestamp,
labels: map[string]string{
"cn": "value",
"serial": "value",
"sans": "value",
},
assert: buildTimestampAssert(t, tlsCertsNotAfterTimestamp),
},
{
name: entryPointReqsTotalName,
labels: map[string]string{

View file

@ -17,18 +17,19 @@ var (
)
const (
statsdMetricsServiceReqsName = "service.request.total"
statsdMetricsServiceLatencyName = "service.request.duration"
statsdRetriesTotalName = "service.retries.total"
statsdConfigReloadsName = "config.reload.total"
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
statsdEntryPointReqsName = "entrypoint.request.total"
statsdEntryPointReqDurationName = "entrypoint.request.duration"
statsdEntryPointOpenConnsName = "entrypoint.connections.open"
statsdOpenConnsName = "service.connections.open"
statsdServerUpName = "service.server.up"
statsdMetricsServiceReqsName = "service.request.total"
statsdMetricsServiceLatencyName = "service.request.duration"
statsdRetriesTotalName = "service.retries.total"
statsdConfigReloadsName = "config.reload.total"
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
statsdEntryPointReqsName = "entrypoint.request.total"
statsdEntryPointReqDurationName = "entrypoint.request.duration"
statsdEntryPointOpenConnsName = "entrypoint.connections.open"
statsdOpenConnsName = "service.connections.open"
statsdServerUpName = "service.server.up"
statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
)
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
@ -48,10 +49,11 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
}
registry := &standardRegistry{
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: statsdClient.NewGauge(statsdTLSCertsNotAfterTimestampName),
}
if config.AddEntryPointsLabels {

View file

@ -35,6 +35,7 @@ func TestStatsD(t *testing.T) {
"traefik.entrypoint.request.duration:10000.000000|ms",
"traefik.entrypoint.connections.open:1.000000|g\n",
"traefik.service.server.up:1.000000|g\n",
"tls.certs.notAfterTimestamp:1.000000|g\n",
}
udp.ShouldReceiveAll(t, expected, func() {
@ -49,6 +50,7 @@ func TestStatsD(t *testing.T) {
statsdRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
statsdRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
statsdRegistry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
statsdRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
}
@ -75,6 +77,7 @@ func TestStatsDWithPrefix(t *testing.T) {
"testPrefix.entrypoint.request.duration:10000.000000|ms",
"testPrefix.entrypoint.connections.open:1.000000|g\n",
"testPrefix.service.server.up:1.000000|g\n",
"tls.certs.notAfterTimestamp:1.000000|g\n",
}
udp.ShouldReceiveAll(t, expected, func() {
@ -89,5 +92,6 @@ func TestStatsDWithPrefix(t *testing.T) {
statsdRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
statsdRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
statsdRegistry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
statsdRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
}

View file

@ -56,15 +56,16 @@ func (c CertificateStore) getDefaultCertificateDomains() []string {
// GetAllDomains return a slice with all the certificate domain.
func (c CertificateStore) GetAllDomains() []string {
allCerts := c.getDefaultCertificateDomains()
allDomains := c.getDefaultCertificateDomains()
// Get dynamic certificates
if c.DynamicCerts != nil && c.DynamicCerts.Get() != nil {
for domains := range c.DynamicCerts.Get().(map[string]*tls.Certificate) {
allCerts = append(allCerts, domains)
for domain := range c.DynamicCerts.Get().(map[string]*tls.Certificate) {
allDomains = append(allDomains, domain)
}
}
return allCerts
return allDomains
}
// GetBestCertificate returns the best match certificate, and caches the response.

View file

@ -131,6 +131,27 @@ func (m *Manager) Get(storeName, configName string) (*tls.Config, error) {
return tlsConfig, err
}
// GetCertificates returns all stored certificates.
func (m *Manager) GetCertificates() []*x509.Certificate {
var certificates []*x509.Certificate
// We iterate over all the certificates.
for _, store := range m.stores {
if store.DynamicCerts != nil && store.DynamicCerts.Get() != nil {
for _, cert := range store.DynamicCerts.Get().(map[string]*tls.Certificate) {
x509Cert, err := x509.ParseCertificate(cert.Certificate[0])
if err != nil {
continue
}
certificates = append(certificates, x509Cert)
}
}
}
return certificates
}
func (m *Manager) getStore(storeName string) *CertificateStore {
_, ok := m.stores[storeName]
if !ok {