Add Tailscale certificate resolver
Co-authored-by: Mathieu Lonjaret <mathieu.lonjaret@gmail.com>
This commit is contained in:
parent
033fccccc7
commit
38d7011487
13 changed files with 957 additions and 48 deletions
366
pkg/provider/tailscale/provider.go
Normal file
366
pkg/provider/tailscale/provider.go
Normal file
|
@ -0,0 +1,366 @@
|
|||
package tailscale
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/tailscale/tscert"
|
||||
"github.com/traefik/traefik/v2/pkg/config/dynamic"
|
||||
"github.com/traefik/traefik/v2/pkg/log"
|
||||
"github.com/traefik/traefik/v2/pkg/muxer/http"
|
||||
"github.com/traefik/traefik/v2/pkg/muxer/tcp"
|
||||
"github.com/traefik/traefik/v2/pkg/safe"
|
||||
traefiktls "github.com/traefik/traefik/v2/pkg/tls"
|
||||
)
|
||||
|
||||
// Provider is the Tailscale certificates provider implementation. It receives
|
||||
// configuration updates (e.g. new router, with new domain) from Traefik core,
|
||||
// fetches the corresponding TLS certificates from the Tailscale daemon, and
|
||||
// sends back to Traefik core a configuration updated with the certificates.
|
||||
type Provider struct {
|
||||
ResolverName string
|
||||
|
||||
dynConfigs chan dynamic.Configuration // updates from Traefik core
|
||||
dynMessages chan<- dynamic.Message // update to Traefik core
|
||||
|
||||
certByDomainMu sync.RWMutex
|
||||
certByDomain map[string]traefiktls.Certificate
|
||||
}
|
||||
|
||||
// ThrottleDuration implements the aggregator.throttled interface, in order to
|
||||
// ensure that this provider is unthrottled.
|
||||
func (p *Provider) ThrottleDuration() time.Duration {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Init implements the provider.Provider interface.
|
||||
func (p *Provider) Init() error {
|
||||
p.dynConfigs = make(chan dynamic.Configuration)
|
||||
p.certByDomain = make(map[string]traefiktls.Certificate)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HandleConfigUpdate hands out a configuration update to the provider.
|
||||
func (p *Provider) HandleConfigUpdate(cfg dynamic.Configuration) {
|
||||
p.dynConfigs <- cfg
|
||||
}
|
||||
|
||||
// Provide starts the provider, which will henceforth send configuration
|
||||
// updates on dynMessages.
|
||||
func (p *Provider) Provide(dynMessages chan<- dynamic.Message, pool *safe.Pool) error {
|
||||
p.dynMessages = dynMessages
|
||||
|
||||
fields := log.Str(log.ProviderName, p.ResolverName+".tailscale")
|
||||
|
||||
pool.GoCtx(func(ctx context.Context) {
|
||||
p.watchDomains(log.With(ctx, fields))
|
||||
})
|
||||
|
||||
pool.GoCtx(func(ctx context.Context) {
|
||||
p.renewCertificates(log.With(ctx, fields))
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// watchDomains watches for Tailscale domain certificates that should be
|
||||
// fetched from the Tailscale daemon.
|
||||
func (p *Provider) watchDomains(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case cfg := <-p.dynConfigs:
|
||||
domains := p.findDomains(ctx, cfg)
|
||||
newDomains := p.findNewDomains(domains)
|
||||
purged := p.purgeUnusedCerts(domains)
|
||||
|
||||
if len(newDomains) == 0 && !purged {
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: what should we do if the fetched certificate is going to expire before the next refresh tick?
|
||||
p.fetchCerts(ctx, newDomains)
|
||||
p.sendDynamicConfig()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// renewCertificates routinely renews previously resolved Tailscale
|
||||
// certificates before they expire.
|
||||
func (p *Provider) renewCertificates(ctx context.Context) {
|
||||
ticker := time.NewTicker(24 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case <-ticker.C:
|
||||
p.certByDomainMu.RLock()
|
||||
var domainsToRenew []string
|
||||
for domain, cert := range p.certByDomain {
|
||||
tlsCert, err := cert.GetCertificateFromBytes()
|
||||
if err != nil {
|
||||
log.FromContext(ctx).
|
||||
WithError(err).
|
||||
Errorf("Unable to get certificate for domain %s", domain)
|
||||
continue
|
||||
}
|
||||
|
||||
// Tailscale tries to renew certificates 14 days before its expiration date.
|
||||
// See https://github.com/tailscale/tailscale/blob/d9efbd97cbf369151e31453749f6692df7413709/ipn/localapi/cert.go#L116
|
||||
if isValidCert(tlsCert, domain, time.Now().AddDate(0, 0, 14)) {
|
||||
continue
|
||||
}
|
||||
|
||||
domainsToRenew = append(domainsToRenew, domain)
|
||||
}
|
||||
p.certByDomainMu.RUnlock()
|
||||
|
||||
if len(domainsToRenew) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
p.fetchCerts(ctx, domainsToRenew)
|
||||
p.sendDynamicConfig()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// findDomains goes through the given dynamic.Configuration and returns all
|
||||
// Tailscale-specific domains found.
|
||||
func (p *Provider) findDomains(ctx context.Context, cfg dynamic.Configuration) []string {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
var domains []string
|
||||
|
||||
if cfg.HTTP != nil {
|
||||
for _, router := range cfg.HTTP.Routers {
|
||||
if router.TLS == nil || router.TLS.CertResolver != p.ResolverName {
|
||||
continue
|
||||
}
|
||||
|
||||
// As a domain list is explicitly defined we are only using the
|
||||
// configured domains. Only the Main domain is considered as
|
||||
// Tailscale domain certificate does not support multiple SANs.
|
||||
if len(router.TLS.Domains) > 0 {
|
||||
for _, domain := range router.TLS.Domains {
|
||||
domains = append(domains, domain.Main)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
parsedDomains, err := http.ParseDomains(router.Rule)
|
||||
if err != nil {
|
||||
logger.Errorf("Unable to parse HTTP router domains: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
domains = append(domains, parsedDomains...)
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.TCP != nil {
|
||||
for _, router := range cfg.TCP.Routers {
|
||||
if router.TLS == nil || router.TLS.CertResolver != p.ResolverName {
|
||||
continue
|
||||
}
|
||||
|
||||
// As a domain list is explicitly defined we are only using the
|
||||
// configured domains. Only the Main domain is considered as
|
||||
// Tailscale domain certificate does not support multiple SANs.
|
||||
if len(router.TLS.Domains) > 0 {
|
||||
for _, domain := range router.TLS.Domains {
|
||||
domains = append(domains, domain.Main)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
parsedDomains, err := tcp.ParseHostSNI(router.Rule)
|
||||
if err != nil {
|
||||
logger.Errorf("Unable to parse TCP router domains: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
domains = append(domains, parsedDomains...)
|
||||
}
|
||||
}
|
||||
|
||||
return sanitizeDomains(ctx, domains)
|
||||
}
|
||||
|
||||
// findNewDomains returns the domains that have not already been fetched from
|
||||
// the Tailscale daemon.
|
||||
func (p *Provider) findNewDomains(domains []string) []string {
|
||||
p.certByDomainMu.RLock()
|
||||
defer p.certByDomainMu.RUnlock()
|
||||
|
||||
var newDomains []string
|
||||
for _, domain := range domains {
|
||||
if _, ok := p.certByDomain[domain]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
newDomains = append(newDomains, domain)
|
||||
}
|
||||
|
||||
return newDomains
|
||||
}
|
||||
|
||||
// purgeUnusedCerts purges the certByDomain map by removing unused certificates
|
||||
// and returns whether some certificates have been removed.
|
||||
func (p *Provider) purgeUnusedCerts(domains []string) bool {
|
||||
p.certByDomainMu.Lock()
|
||||
defer p.certByDomainMu.Unlock()
|
||||
|
||||
newCertByDomain := make(map[string]traefiktls.Certificate)
|
||||
for _, domain := range domains {
|
||||
if cert, ok := p.certByDomain[domain]; ok {
|
||||
newCertByDomain[domain] = cert
|
||||
}
|
||||
}
|
||||
|
||||
purged := len(p.certByDomain) > len(newCertByDomain)
|
||||
|
||||
p.certByDomain = newCertByDomain
|
||||
|
||||
return purged
|
||||
}
|
||||
|
||||
// fetchCerts fetches the certificates for the provided domains from the
|
||||
// Tailscale daemon.
|
||||
func (p *Provider) fetchCerts(ctx context.Context, domains []string) {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
for _, domain := range domains {
|
||||
cert, key, err := tscert.CertPair(ctx, domain)
|
||||
if err != nil {
|
||||
logger.WithError(err).Errorf("Unable to fetch certificate for domain %q", domain)
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Debugf("Fetched certificate for domain %q", domain)
|
||||
|
||||
p.certByDomainMu.Lock()
|
||||
p.certByDomain[domain] = traefiktls.Certificate{
|
||||
CertFile: traefiktls.FileOrContent(cert),
|
||||
KeyFile: traefiktls.FileOrContent(key),
|
||||
}
|
||||
p.certByDomainMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// sendDynamicConfig sends a dynamic.Message with the dynamic.Configuration
|
||||
// containing the newly generated (or renewed) Tailscale certs.
|
||||
func (p *Provider) sendDynamicConfig() {
|
||||
p.certByDomainMu.RLock()
|
||||
defer p.certByDomainMu.RUnlock()
|
||||
|
||||
// TODO: we always send back to traefik core the set of certificates
|
||||
// sorted, to make sure that two identical sets, that would be sorted
|
||||
// differently, do not trigger another configuration update because of the
|
||||
// mismatch. But in reality we should not end up sending a certificates
|
||||
// update if there was no new certs to generate or renew in the first
|
||||
// place, so this scenario should never happen, and the sorting might
|
||||
// actually not be needed.
|
||||
var sortedDomains []string
|
||||
for domain := range p.certByDomain {
|
||||
sortedDomains = append(sortedDomains, domain)
|
||||
}
|
||||
sort.Strings(sortedDomains)
|
||||
|
||||
var certs []*traefiktls.CertAndStores
|
||||
for _, domain := range sortedDomains {
|
||||
// Only the default store is supported.
|
||||
certs = append(certs, &traefiktls.CertAndStores{
|
||||
Stores: []string{traefiktls.DefaultTLSStoreName},
|
||||
Certificate: p.certByDomain[domain],
|
||||
})
|
||||
}
|
||||
|
||||
p.dynMessages <- dynamic.Message{
|
||||
ProviderName: p.ResolverName + ".tailscale",
|
||||
Configuration: &dynamic.Configuration{
|
||||
TLS: &dynamic.TLSConfiguration{Certificates: certs},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// sanitizeDomains removes duplicated and invalid Tailscale subdomains, from
|
||||
// the provided list.
|
||||
func sanitizeDomains(ctx context.Context, domains []string) []string {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
seen := map[string]struct{}{}
|
||||
|
||||
var sanitizedDomains []string
|
||||
for _, domain := range domains {
|
||||
if _, ok := seen[domain]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if !isTailscaleDomain(domain) {
|
||||
logger.Errorf("Domain %s is not a valid Tailscale domain", domain)
|
||||
continue
|
||||
}
|
||||
|
||||
sanitizedDomains = append(sanitizedDomains, domain)
|
||||
seen[domain] = struct{}{}
|
||||
}
|
||||
return sanitizedDomains
|
||||
}
|
||||
|
||||
// isTailscaleDomain returns whether the given domain is a valid Tailscale
|
||||
// domain. A valid Tailscale domain has the following form:
|
||||
// machine-name.domains-alias.ts.net.
|
||||
func isTailscaleDomain(domain string) bool {
|
||||
// TODO: extra check, against the actual list of allowed domains names,
|
||||
// provided by the Tailscale daemon status?
|
||||
labels := strings.Split(domain, ".")
|
||||
|
||||
return len(labels) == 4 && labels[2] == "ts" && labels[3] == "net"
|
||||
}
|
||||
|
||||
// isValidCert returns whether the given tls.Certificate is valid for the given
|
||||
// domain at the given time.
|
||||
func isValidCert(cert tls.Certificate, domain string, now time.Time) bool {
|
||||
var leaf *x509.Certificate
|
||||
|
||||
intermediates := x509.NewCertPool()
|
||||
for i, raw := range cert.Certificate {
|
||||
der, err := x509.ParseCertificate(raw)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
leaf = der
|
||||
continue
|
||||
}
|
||||
|
||||
intermediates.AddCert(der)
|
||||
}
|
||||
|
||||
if leaf == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
_, err := leaf.Verify(x509.VerifyOptions{
|
||||
DNSName: domain,
|
||||
Intermediates: intermediates,
|
||||
CurrentTime: now,
|
||||
})
|
||||
|
||||
return err == nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue