Switch to golang/dep.

This commit is contained in:
Ludovic Fernandez 2018-01-09 21:46:04 +01:00 committed by Traefiker
parent d88554fa92
commit 044d87d96d
239 changed files with 42372 additions and 7011 deletions

View file

@ -1,14 +1,21 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
The MIT License (MIT)
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Copyright (c) 2013 TOML authors
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -775,7 +775,7 @@ func lexDatetime(lx *lexer) stateFn {
return lexDatetime
}
switch r {
case '-', 'T', ':', '.', 'Z':
case '-', 'T', ':', '.', 'Z', '+':
return lexDatetime
}

5
vendor/github.com/aws/aws-sdk-go/service/generate.go generated vendored Normal file
View file

@ -0,0 +1,5 @@
// Package service contains automatically generated AWS clients.
package service
//go:generate go run -tags codegen ../private/model/cli/gen-api/main.go -path=../service ../models/apis/*/*/api-2.json
//go:generate gofmt -s -w ../service

View file

@ -1,20 +0,0 @@
The MIT License (MIT)
Copyright (c) 2013 Ben Johnson
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,10 +0,0 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,10 +0,0 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,28 +0,0 @@
package bolt
import "unsafe"
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned bool
func init() {
// Simple check to see whether this arch handles unaligned load/stores
// correctly.
// ARM9 and older devices require load/stores to be from/to aligned
// addresses. If not, the lower 2 bits are cleared and that address is
// read in a jumbled up order.
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
brokenUnaligned = val != 0x11222211
}

View file

@ -1,12 +0,0 @@
// +build arm64
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,10 +0,0 @@
package bolt
import (
"syscall"
)
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return syscall.Fdatasync(int(db.file.Fd()))
}

View file

@ -1,27 +0,0 @@
package bolt
import (
"syscall"
"unsafe"
)
const (
msAsync = 1 << iota // perform asynchronous writes
msSync // perform synchronous writes
msInvalidate // invalidate cached data
)
func msync(db *DB) error {
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
if errno != 0 {
return errno
}
return nil
}
func fdatasync(db *DB) error {
if db.data != nil {
return msync(db)
}
return db.file.Sync()
}

View file

@ -1,9 +0,0 @@
// +build ppc
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF

View file

@ -1,12 +0,0 @@
// +build ppc64
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,12 +0,0 @@
// +build ppc64le
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,12 +0,0 @@
// +build s390x
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View file

@ -1,89 +0,0 @@
// +build !windows,!plan9,!solaris
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
flag := syscall.LOCK_SH
if exclusive {
flag = syscall.LOCK_EX
}
// Otherwise attempt to obtain an exclusive lock.
err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Map the data file to memory.
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := syscall.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}
// NOTE: This function is copied from stdlib because it is not available on darwin.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

View file

@ -1,90 +0,0 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Pid = 0
lock.Whence = 0
lock.Pid = 0
if exclusive {
lock.Type = syscall.F_WRLCK
} else {
lock.Type = syscall.F_RDLCK
}
err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock)
if err == nil {
return nil
} else if err != syscall.EAGAIN {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Type = syscall.F_UNLCK
lock.Whence = 0
return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Map the data file to memory.
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := unix.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}

View file

@ -1,144 +0,0 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procLockFileEx = modkernel32.NewProc("LockFileEx")
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
)
const (
lockExt = ".lock"
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
flagLockExclusive = 2
flagLockFailImmediately = 1
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
errLockViolation syscall.Errno = 0x21
)
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
if r == 0 {
return err
}
return nil
}
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
if r == 0 {
return err
}
return nil
}
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
// Create a separate lock file on windows because a process
// cannot share an exclusive lock on the same file. This is
// needed during Tx.WriteTo().
f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
if err != nil {
return err
}
db.lockfile = f
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
var flag uint32 = flagLockFailImmediately
if exclusive {
flag |= flagLockExclusive
}
err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
if err == nil {
return nil
} else if err != errLockViolation {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
db.lockfile.Close()
os.Remove(db.path + lockExt)
return err
}
// mmap memory maps a DB's data file.
// Based on: https://github.com/edsrzf/mmap-go
func mmap(db *DB, sz int) error {
if !db.readOnly {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.
sizelo := uint32(sz >> 32)
sizehi := uint32(sz) & 0xffffffff
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
if h == 0 {
return os.NewSyscallError("CreateFileMapping", errno)
}
// Create the memory map.
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
if addr == 0 {
return os.NewSyscallError("MapViewOfFile", errno)
}
// Close mapping handle.
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
return os.NewSyscallError("CloseHandle", err)
}
// Convert to a byte array.
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
db.datasz = sz
return nil
}
// munmap unmaps a pointer from a file.
// Based on: https://github.com/edsrzf/mmap-go
func munmap(db *DB) error {
if db.data == nil {
return nil
}
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
if err := syscall.UnmapViewOfFile(addr); err != nil {
return os.NewSyscallError("UnmapViewOfFile", err)
}
return nil
}

View file

@ -1,8 +0,0 @@
// +build !windows,!plan9,!linux,!openbsd
package bolt
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}

View file

@ -1,777 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"unsafe"
)
const (
// MaxKeySize is the maximum length of a key, in bytes.
MaxKeySize = 32768
// MaxValueSize is the maximum length of a value, in bytes.
MaxValueSize = (1 << 31) - 2
)
const (
maxUint = ^uint(0)
minUint = 0
maxInt = int(^uint(0) >> 1)
minInt = -maxInt - 1
)
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const (
minFillPercent = 0.1
maxFillPercent = 1.0
)
// DefaultFillPercent is the percentage that split pages are filled.
// This value can be changed by setting Bucket.FillPercent.
const DefaultFillPercent = 0.5
// Bucket represents a collection of key/value pairs inside the database.
type Bucket struct {
*bucket
tx *Tx // the associated transaction
buckets map[string]*Bucket // subbucket cache
page *page // inline page reference
rootNode *node // materialized node for the root page.
nodes map[pgid]*node // node cache
// Sets the threshold for filling nodes when they split. By default,
// the bucket will fill to 50% but it can be useful to increase this
// amount if you know that your write workloads are mostly append-only.
//
// This is non-persisted across transactions so it must be set in every Tx.
FillPercent float64
}
// bucket represents the on-file representation of a bucket.
// This is stored as the "value" of a bucket key. If the bucket is small enough,
// then its root page can be stored inline in the "value", after the bucket
// header. In the case of inline buckets, the "root" will be 0.
type bucket struct {
root pgid // page id of the bucket's root-level page
sequence uint64 // monotonically incrementing, used by NextSequence()
}
// newBucket returns a new bucket associated with a transaction.
func newBucket(tx *Tx) Bucket {
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
if tx.writable {
b.buckets = make(map[string]*Bucket)
b.nodes = make(map[pgid]*node)
}
return b
}
// Tx returns the tx of the bucket.
func (b *Bucket) Tx() *Tx {
return b.tx
}
// Root returns the root of the bucket.
func (b *Bucket) Root() pgid {
return b.root
}
// Writable returns whether the bucket is writable.
func (b *Bucket) Writable() bool {
return b.tx.writable
}
// Cursor creates a cursor associated with the bucket.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (b *Bucket) Cursor() *Cursor {
// Update transaction statistics.
b.tx.stats.CursorCount++
// Allocate and return a cursor.
return &Cursor{
bucket: b,
stack: make([]elemRef, 0),
}
}
// Bucket retrieves a nested bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) Bucket(name []byte) *Bucket {
if b.buckets != nil {
if child := b.buckets[string(name)]; child != nil {
return child
}
}
// Move cursor to key.
c := b.Cursor()
k, v, flags := c.seek(name)
// Return nil if the key doesn't exist or it is not a bucket.
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
return nil
}
// Otherwise create a bucket and cache it.
var child = b.openBucket(v)
if b.buckets != nil {
b.buckets[string(name)] = child
}
return child
}
// Helper method that re-interprets a sub-bucket value
// from a parent into a Bucket
func (b *Bucket) openBucket(value []byte) *Bucket {
var child = newBucket(b.tx)
// If unaligned load/stores are broken on this arch and value is
// unaligned simply clone to an aligned byte array.
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
if unaligned {
value = cloneBytes(value)
}
// If this is a writable transaction then we need to copy the bucket entry.
// Read-only transactions can point directly at the mmap entry.
if b.tx.writable && !unaligned {
child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
} else {
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
}
// Save a reference to the inline page if the bucket is inline.
if child.root == 0 {
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
}
return &child
}
// CreateBucket creates a new bucket at the given key and returns the new bucket.
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if b.tx.db == nil {
return nil, ErrTxClosed
} else if !b.tx.writable {
return nil, ErrTxNotWritable
} else if len(key) == 0 {
return nil, ErrBucketNameRequired
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key.
if bytes.Equal(key, k) {
if (flags & bucketLeafFlag) != 0 {
return nil, ErrBucketExists
}
return nil, ErrIncompatibleValue
}
// Create empty, inline bucket.
var bucket = Bucket{
bucket: &bucket{},
rootNode: &node{isLeaf: true},
FillPercent: DefaultFillPercent,
}
var value = bucket.write()
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, bucketLeafFlag)
// Since subbuckets are not allowed on inline buckets, we need to
// dereference the inline page, if it exists. This will cause the bucket
// to be treated as a regular, non-inline bucket for the rest of the tx.
b.page = nil
return b.Bucket(key), nil
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
child, err := b.CreateBucket(key)
if err == ErrBucketExists {
return b.Bucket(key), nil
} else if err != nil {
return nil, err
}
return child, nil
}
// DeleteBucket deletes a bucket at the given key.
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
func (b *Bucket) DeleteBucket(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if bucket doesn't exist or is not a bucket.
if !bytes.Equal(key, k) {
return ErrBucketNotFound
} else if (flags & bucketLeafFlag) == 0 {
return ErrIncompatibleValue
}
// Recursively delete all child buckets.
child := b.Bucket(key)
err := child.ForEach(func(k, v []byte) error {
if v == nil {
if err := child.DeleteBucket(k); err != nil {
return fmt.Errorf("delete bucket: %s", err)
}
}
return nil
})
if err != nil {
return err
}
// Remove cached copy.
delete(b.buckets, string(key))
// Release all bucket pages to freelist.
child.nodes = nil
child.rootNode = nil
child.free()
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// Get retrieves the value for a key in the bucket.
// Returns a nil value if the key does not exist or if the key is a nested bucket.
// The returned value is only valid for the life of the transaction.
func (b *Bucket) Get(key []byte) []byte {
k, v, flags := b.Cursor().seek(key)
// Return nil if this is a bucket.
if (flags & bucketLeafFlag) != 0 {
return nil
}
// If our target node isn't the same key as what's passed in then return nil.
if !bytes.Equal(key, k) {
return nil
}
return v
}
// Put sets the value for a key in the bucket.
// If the key exist then its previous value will be overwritten.
// Supplied value must remain valid for the life of the transaction.
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
func (b *Bucket) Put(key []byte, value []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
} else if len(key) == 0 {
return ErrKeyRequired
} else if len(key) > MaxKeySize {
return ErrKeyTooLarge
} else if int64(len(value)) > MaxValueSize {
return ErrValueTooLarge
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key with a bucket value.
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, 0)
return nil
}
// Delete removes a key from the bucket.
// If the key does not exist then nothing is done and a nil error is returned.
// Returns an error if the bucket was created from a read-only transaction.
func (b *Bucket) Delete(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
_, _, flags := c.seek(key)
// Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// Sequence returns the current integer for the bucket without incrementing it.
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
// SetSequence updates the sequence number for the bucket.
func (b *Bucket) SetSequence(v uint64) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence = v
return nil
}
// NextSequence returns an autoincrementing integer for the bucket.
func (b *Bucket) NextSequence() (uint64, error) {
if b.tx.db == nil {
return 0, ErrTxClosed
} else if !b.Writable() {
return 0, ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence++
return b.bucket.sequence, nil
}
// ForEach executes a function for each key/value pair in a bucket.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller. The provided function must not modify
// the bucket; this will result in undefined behavior.
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed
}
c := b.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
if err := fn(k, v); err != nil {
return err
}
}
return nil
}
// Stat returns stats on a bucket.
func (b *Bucket) Stats() BucketStats {
var s, subStats BucketStats
pageSize := b.tx.db.pageSize
s.BucketN += 1
if b.root == 0 {
s.InlineBucketN += 1
}
b.forEachPage(func(p *page, depth int) {
if (p.flags & leafPageFlag) != 0 {
s.KeyN += int(p.count)
// used totals the used bytes for the page
used := pageHeaderSize
if p.count != 0 {
// If page has any elements, add all element headers.
used += leafPageElementSize * int(p.count-1)
// Add all element key, value sizes.
// The computation takes advantage of the fact that the position
// of the last element's key/value equals to the total of the sizes
// of all previous elements' keys and values.
// It also includes the last element's header.
lastElement := p.leafPageElement(p.count - 1)
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
}
if b.root == 0 {
// For inlined bucket just update the inline stats
s.InlineBucketInuse += used
} else {
// For non-inlined bucket update all the leaf stats
s.LeafPageN++
s.LeafInuse += used
s.LeafOverflowN += int(p.overflow)
// Collect stats from sub-buckets.
// Do that by iterating over all element headers
// looking for the ones with the bucketLeafFlag.
for i := uint16(0); i < p.count; i++ {
e := p.leafPageElement(i)
if (e.flags & bucketLeafFlag) != 0 {
// For any bucket element, open the element value
// and recursively call Stats on the contained bucket.
subStats.Add(b.openBucket(e.value()).Stats())
}
}
}
} else if (p.flags & branchPageFlag) != 0 {
s.BranchPageN++
lastElement := p.branchPageElement(p.count - 1)
// used totals the used bytes for the page
// Add header and all element headers.
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
// Add size of all keys and values.
// Again, use the fact that last element's position equals to
// the total of key, value sizes of all previous elements.
used += int(lastElement.pos + lastElement.ksize)
s.BranchInuse += used
s.BranchOverflowN += int(p.overflow)
}
// Keep track of maximum page depth.
if depth+1 > s.Depth {
s.Depth = (depth + 1)
}
})
// Alloc stats can be computed from page counts and pageSize.
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
// Add the max depth of sub-buckets to get total nested depth.
s.Depth += subStats.Depth
// Add the stats for all sub-buckets
s.Add(subStats)
return s
}
// forEachPage iterates over every page in a bucket, including inline pages.
func (b *Bucket) forEachPage(fn func(*page, int)) {
// If we have an inline page then just use that.
if b.page != nil {
fn(b.page, 0)
return
}
// Otherwise traverse the page hierarchy.
b.tx.forEachPage(b.root, 0, fn)
}
// forEachPageNode iterates over every page (or node) in a bucket.
// This also includes inline pages.
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
// If we have an inline page or root node then just use that.
if b.page != nil {
fn(b.page, nil, 0)
return
}
b._forEachPageNode(b.root, 0, fn)
}
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
var p, n = b.pageNode(pgid)
// Execute function.
fn(p, n, depth)
// Recursively loop over children.
if p != nil {
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
b._forEachPageNode(elem.pgid, depth+1, fn)
}
}
} else {
if !n.isLeaf {
for _, inode := range n.inodes {
b._forEachPageNode(inode.pgid, depth+1, fn)
}
}
}
}
// spill writes all the nodes for this bucket to dirty pages.
func (b *Bucket) spill() error {
// Spill all child buckets first.
for name, child := range b.buckets {
// If the child bucket is small enough and it has no child buckets then
// write it inline into the parent bucket's page. Otherwise spill it
// like a normal bucket and make the parent value a pointer to the page.
var value []byte
if child.inlineable() {
child.free()
value = child.write()
} else {
if err := child.spill(); err != nil {
return err
}
// Update the child bucket header in this bucket.
value = make([]byte, unsafe.Sizeof(bucket{}))
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *child.bucket
}
// Skip writing the bucket if there are no materialized nodes.
if child.rootNode == nil {
continue
}
// Update parent node.
var c = b.Cursor()
k, _, flags := c.seek([]byte(name))
if !bytes.Equal([]byte(name), k) {
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
}
if flags&bucketLeafFlag == 0 {
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
}
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
}
// Ignore if there's not a materialized root node.
if b.rootNode == nil {
return nil
}
// Spill nodes.
if err := b.rootNode.spill(); err != nil {
return err
}
b.rootNode = b.rootNode.root()
// Update the root node for this bucket.
if b.rootNode.pgid >= b.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
}
b.root = b.rootNode.pgid
return nil
}
// inlineable returns true if a bucket is small enough to be written inline
// and if it contains no subbuckets. Otherwise returns false.
func (b *Bucket) inlineable() bool {
var n = b.rootNode
// Bucket must only contain a single leaf node.
if n == nil || !n.isLeaf {
return false
}
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
// our threshold for inline bucket size.
var size = pageHeaderSize
for _, inode := range n.inodes {
size += leafPageElementSize + len(inode.key) + len(inode.value)
if inode.flags&bucketLeafFlag != 0 {
return false
} else if size > b.maxInlineBucketSize() {
return false
}
}
return true
}
// Returns the maximum total size of a bucket to make it a candidate for inlining.
func (b *Bucket) maxInlineBucketSize() int {
return b.tx.db.pageSize / 4
}
// write allocates and writes a bucket to a byte slice.
func (b *Bucket) write() []byte {
// Allocate the appropriate size.
var n = b.rootNode
var value = make([]byte, bucketHeaderSize+n.size())
// Write a bucket header.
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *b.bucket
// Convert byte slice to a fake page and write the root node.
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
n.write(p)
return value
}
// rebalance attempts to balance all nodes.
func (b *Bucket) rebalance() {
for _, n := range b.nodes {
n.rebalance()
}
for _, child := range b.buckets {
child.rebalance()
}
}
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgid pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
// Retrieve node if it's already been created.
if n := b.nodes[pgid]; n != nil {
return n
}
// Otherwise create a node and cache it.
n := &node{bucket: b, parent: parent}
if parent == nil {
b.rootNode = n
} else {
parent.children = append(parent.children, n)
}
// Use the inline page if this is an inline bucket.
var p = b.page
if p == nil {
p = b.tx.page(pgid)
}
// Read the page into the node and cache it.
n.read(p)
b.nodes[pgid] = n
// Update statistics.
b.tx.stats.NodeCount++
return n
}
// free recursively frees all pages in the bucket.
func (b *Bucket) free() {
if b.root == 0 {
return
}
var tx = b.tx
b.forEachPageNode(func(p *page, n *node, _ int) {
if p != nil {
tx.db.freelist.free(tx.meta.txid, p)
} else {
n.free()
}
})
b.root = 0
}
// dereference removes all references to the old mmap.
func (b *Bucket) dereference() {
if b.rootNode != nil {
b.rootNode.root().dereference()
}
for _, child := range b.buckets {
child.dereference()
}
}
// pageNode returns the in-memory node, if it exists.
// Otherwise returns the underlying page.
func (b *Bucket) pageNode(id pgid) (*page, *node) {
// Inline buckets have a fake page embedded in their value so treat them
// differently. We'll return the rootNode (if available) or the fake page.
if b.root == 0 {
if id != 0 {
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
}
if b.rootNode != nil {
return nil, b.rootNode
}
return b.page, nil
}
// Check the node cache for non-inline buckets.
if b.nodes != nil {
if n := b.nodes[id]; n != nil {
return nil, n
}
}
// Finally lookup the page from the transaction if no node is materialized.
return b.tx.page(id), nil
}
// BucketStats records statistics about resources used by a bucket.
type BucketStats struct {
// Page count statistics.
BranchPageN int // number of logical branch pages
BranchOverflowN int // number of physical branch overflow pages
LeafPageN int // number of logical leaf pages
LeafOverflowN int // number of physical leaf overflow pages
// Tree statistics.
KeyN int // number of keys/value pairs
Depth int // number of levels in B+tree
// Page size utilization.
BranchAlloc int // bytes allocated for physical branch pages
BranchInuse int // bytes actually used for branch data
LeafAlloc int // bytes allocated for physical leaf pages
LeafInuse int // bytes actually used for leaf data
// Bucket statistics
BucketN int // total number of buckets including the top bucket
InlineBucketN int // total number on inlined buckets
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
}
func (s *BucketStats) Add(other BucketStats) {
s.BranchPageN += other.BranchPageN
s.BranchOverflowN += other.BranchOverflowN
s.LeafPageN += other.LeafPageN
s.LeafOverflowN += other.LeafOverflowN
s.KeyN += other.KeyN
if s.Depth < other.Depth {
s.Depth = other.Depth
}
s.BranchAlloc += other.BranchAlloc
s.BranchInuse += other.BranchInuse
s.LeafAlloc += other.LeafAlloc
s.LeafInuse += other.LeafInuse
s.BucketN += other.BucketN
s.InlineBucketN += other.InlineBucketN
s.InlineBucketInuse += other.InlineBucketInuse
}
// cloneBytes returns a copy of a given slice.
func cloneBytes(v []byte) []byte {
var clone = make([]byte, len(v))
copy(clone, v)
return clone
}

View file

@ -1,400 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"sort"
)
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
// Cursors see nested buckets with value == nil.
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
//
// Keys and values returned from the cursor are only valid for the life of the transaction.
//
// Changing data while traversing with a cursor may cause it to be invalidated
// and return unexpected keys and/or values. You must reposition your cursor
// after mutating data.
type Cursor struct {
bucket *Bucket
stack []elemRef
}
// Bucket returns the bucket that this cursor was created from.
func (c *Cursor) Bucket() *Bucket {
return c.bucket
}
// First moves the cursor to the first item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) First() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
c.first()
// If we land on an empty page then move to the next value.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
c.next()
}
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Last moves the cursor to the last item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Last() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
ref := elemRef{page: p, node: n}
ref.index = ref.count() - 1
c.stack = append(c.stack, ref)
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Next moves the cursor to the next item in the bucket and returns its key and value.
// If the cursor is at the end of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Next() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.next()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Prev() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Attempt to move back one element until we're successful.
// Move up the stack as we hit the beginning of each page in our stack.
for i := len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index > 0 {
elem.index--
break
}
c.stack = c.stack[:i]
}
// If we've hit the end then return nil.
if len(c.stack) == 0 {
return nil, nil
}
// Move down the stack to find the last element of the last leaf under this branch.
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used. If no keys
// follow, a nil key is returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
k, v, flags := c.seek(seek)
// If we ended up after the last element of a page then move to the next one.
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
k, v, flags = c.next()
}
if k == nil {
return nil, nil
} else if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Delete removes the current key/value under the cursor from the bucket.
// Delete fails if current key/value is a bucket or if the transaction is not writable.
func (c *Cursor) Delete() error {
if c.bucket.tx.db == nil {
return ErrTxClosed
} else if !c.bucket.Writable() {
return ErrTxNotWritable
}
key, _, flags := c.keyValue()
// Return an error if current value is a bucket.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
c.node().del(key)
return nil
}
// seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used.
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
c.search(seek, c.bucket.root)
ref := &c.stack[len(c.stack)-1]
// If the cursor is pointing to the end of page/node then return nil.
if ref.index >= ref.count() {
return nil, nil, 0
}
// If this is a bucket then return a nil value.
return c.keyValue()
}
// first moves the cursor to the first leaf element under the last page in the stack.
func (c *Cursor) first() {
for {
// Exit when we hit a leaf page.
var ref = &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the first element to the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
}
}
// last moves the cursor to the last leaf element under the last page in the stack.
func (c *Cursor) last() {
for {
// Exit when we hit a leaf page.
ref := &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the last element in the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
var nextRef = elemRef{page: p, node: n}
nextRef.index = nextRef.count() - 1
c.stack = append(c.stack, nextRef)
}
}
// next moves to the next leaf element and returns the key and value.
// If the cursor is at the last leaf element then it stays there and returns nil.
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
for {
// Attempt to move over one element until we're successful.
// Move up the stack as we hit the end of each page in our stack.
var i int
for i = len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index < elem.count()-1 {
elem.index++
break
}
}
// If we've hit the root page then stop and return. This will leave the
// cursor on the last element of the last page.
if i == -1 {
return nil, nil, 0
}
// Otherwise start from where we left off in the stack and find the
// first element of the first leaf page.
c.stack = c.stack[:i+1]
c.first()
// If this is an empty page then restart and move back up the stack.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
continue
}
return c.keyValue()
}
}
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgid pgid) {
p, n := c.bucket.pageNode(pgid)
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
}
e := elemRef{page: p, node: n}
c.stack = append(c.stack, e)
// If we're on a leaf page/node then find the specific node.
if e.isLeaf() {
c.nsearch(key)
return
}
if n != nil {
c.searchNode(key, n)
return
}
c.searchPage(key, p)
}
func (c *Cursor) searchNode(key []byte, n *node) {
var exact bool
index := sort.Search(len(n.inodes), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(n.inodes[i].key, key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, n.inodes[index].pgid)
}
func (c *Cursor) searchPage(key []byte, p *page) {
// Binary search for the correct range.
inodes := p.branchPageElements()
var exact bool
index := sort.Search(int(p.count), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(inodes[i].key(), key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, inodes[index].pgid)
}
// nsearch searches the leaf node on the top of the stack for a key.
func (c *Cursor) nsearch(key []byte) {
e := &c.stack[len(c.stack)-1]
p, n := e.page, e.node
// If we have a node then search its inodes.
if n != nil {
index := sort.Search(len(n.inodes), func(i int) bool {
return bytes.Compare(n.inodes[i].key, key) != -1
})
e.index = index
return
}
// If we have a page then search its leaf elements.
inodes := p.leafPageElements()
index := sort.Search(int(p.count), func(i int) bool {
return bytes.Compare(inodes[i].key(), key) != -1
})
e.index = index
}
// keyValue returns the key and value of the current leaf element.
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
ref := &c.stack[len(c.stack)-1]
if ref.count() == 0 || ref.index >= ref.count() {
return nil, nil, 0
}
// Retrieve value from node.
if ref.node != nil {
inode := &ref.node.inodes[ref.index]
return inode.key, inode.value, inode.flags
}
// Or retrieve value from page.
elem := ref.page.leafPageElement(uint16(ref.index))
return elem.key(), elem.value(), elem.flags
}
// node returns the node that the cursor is currently positioned on.
func (c *Cursor) node() *node {
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
// If the top of the stack is a leaf node then just return it.
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
return ref.node
}
// Start from root and traverse down the hierarchy.
var n = c.stack[0].node
if n == nil {
n = c.bucket.node(c.stack[0].page.id, nil)
}
for _, ref := range c.stack[:len(c.stack)-1] {
_assert(!n.isLeaf, "expected branch node")
n = n.childAt(int(ref.index))
}
_assert(n.isLeaf, "expected leaf node")
return n
}
// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
page *page
node *node
index int
}
// isLeaf returns whether the ref is pointing at a leaf page/node.
func (r *elemRef) isLeaf() bool {
if r.node != nil {
return r.node.isLeaf
}
return (r.page.flags & leafPageFlag) != 0
}
// count returns the number of inodes or page elements.
func (r *elemRef) count() int {
if r.node != nil {
return len(r.node.inodes)
}
return int(r.page.count)
}

1039
vendor/github.com/boltdb/bolt/db.go generated vendored

File diff suppressed because it is too large Load diff

44
vendor/github.com/boltdb/bolt/doc.go generated vendored
View file

@ -1,44 +0,0 @@
/*
Package bolt implements a low-level key/value store in pure Go. It supports
fully serializable transactions, ACID semantics, and lock-free MVCC with
multiple readers and a single writer. Bolt can be used for projects that
want a simple data store without the need to add large dependencies such as
Postgres or MySQL.
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
optimized for fast read access and does not require recovery in the event of a
system crash. Transactions which have not finished committing will simply be
rolled back in the event of a crash.
The design of Bolt is based on Howard Chu's LMDB database project.
Bolt currently works on Windows, Mac OS X, and Linux.
Basics
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
a collection of buckets and is represented by a single file on disk. A bucket is
a collection of unique keys that are associated with values.
Transactions provide either read-only or read-write access to the database.
Read-only transactions can retrieve key/value pairs and can use Cursors to
iterate over the dataset sequentially. Read-write transactions can create and
delete buckets and can insert and remove keys. Only one read-write transaction
is allowed at a time.
Caveats
The database uses a read-only, memory-mapped data file to ensure that
applications cannot corrupt the database, however, this means that keys and
values returned from Bolt cannot be changed. Writing to a read-only byte slice
will cause Go to panic.
Keys and values retrieved from the database are only valid for the life of
the transaction. When used outside the transaction, these byte slices can
point to different data or can point to invalid memory which will cause a panic.
*/
package bolt

View file

@ -1,71 +0,0 @@
package bolt
import "errors"
// These errors can be returned when opening or calling methods on a DB.
var (
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
// is opened or after it is closed.
ErrDatabaseNotOpen = errors.New("database not open")
// ErrDatabaseOpen is returned when opening a database that is
// already open.
ErrDatabaseOpen = errors.New("database already open")
// ErrInvalid is returned when both meta pages on a database are invalid.
// This typically occurs when a file is not a bolt database.
ErrInvalid = errors.New("invalid database")
// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
// ErrChecksum is returned when either meta page checksum does not match.
ErrChecksum = errors.New("checksum error")
// ErrTimeout is returned when a database cannot obtain an exclusive lock
// on the data file after the timeout passed to Open().
ErrTimeout = errors.New("timeout")
)
// These errors can occur when beginning or committing a Tx.
var (
// ErrTxNotWritable is returned when performing a write operation on a
// read-only transaction.
ErrTxNotWritable = errors.New("tx not writable")
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
)
// These errors can occur when putting or deleting a value or a bucket.
var (
// ErrBucketNotFound is returned when trying to access a bucket that has
// not been created yet.
ErrBucketNotFound = errors.New("bucket not found")
// ErrBucketExists is returned when creating a bucket that already exists.
ErrBucketExists = errors.New("bucket already exists")
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
ErrBucketNameRequired = errors.New("bucket name required")
// ErrKeyRequired is returned when inserting a zero-length key.
ErrKeyRequired = errors.New("key required")
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
ErrKeyTooLarge = errors.New("key too large")
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrIncompatibleValue is returned when trying create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.
ErrIncompatibleValue = errors.New("incompatible value")
)

View file

@ -1,252 +0,0 @@
package bolt
import (
"fmt"
"sort"
"unsafe"
)
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
}
// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
pending: make(map[txid][]pgid),
cache: make(map[pgid]bool),
}
}
// size returns the size of the page after serialization.
func (f *freelist) size() int {
n := f.count()
if n >= 0xFFFF {
// The first element will be used to store the count. See freelist.write.
n++
}
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
}
// count returns count of pages on the freelist
func (f *freelist) count() int {
return f.free_count() + f.pending_count()
}
// free_count returns count of free pages
func (f *freelist) free_count() int {
return len(f.ids)
}
// pending_count returns count of pending pages
func (f *freelist) pending_count() int {
var count int
for _, list := range f.pending {
count += len(list)
}
return count
}
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
// f.count returns the minimum length required for dst.
func (f *freelist) copyall(dst []pgid) {
m := make(pgids, 0, f.pending_count())
for _, list := range f.pending {
m = append(m, list...)
}
sort.Sort(m)
mergepgids(dst, f.ids, m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(n int) pgid {
if len(f.ids) == 0 {
return 0
}
var initial, previd pgid
for i, id := range f.ids {
if id <= 1 {
panic(fmt.Sprintf("invalid page allocation: %d", id))
}
// Reset initial page if this is not contiguous.
if previd == 0 || id-previd != 1 {
initial = id
}
// If we found a contiguous block then remove it and return it.
if (id-initial)+1 == pgid(n) {
// If we're allocating off the beginning then take the fast path
// and just adjust the existing slice. This will use extra memory
// temporarily but the append() in free() will realloc the slice
// as is necessary.
if (i + 1) == n {
f.ids = f.ids[i+1:]
} else {
copy(f.ids[i-n+1:], f.ids[i+1:])
f.ids = f.ids[:len(f.ids)-n]
}
// Remove from the free cache.
for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, initial+i)
}
return initial
}
previd = id
}
return 0
}
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
if p.id <= 1 {
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
}
// Free page and all its overflow pages.
var ids = f.pending[txid]
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
// Verify that page is not already free.
if f.cache[id] {
panic(fmt.Sprintf("page %d already freed", id))
}
// Add to the freelist and cache.
ids = append(ids, id)
f.cache[id] = true
}
f.pending[txid] = ids
}
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
for tid, ids := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free.
m = append(m, ids...)
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// rollback removes the pages from a given pending tx.
func (f *freelist) rollback(txid txid) {
// Remove page ids from cache.
for _, id := range f.pending[txid] {
delete(f.cache, id)
}
// Remove pages from pending list.
delete(f.pending, txid)
}
// freed returns whether a given page is in the free list.
func (f *freelist) freed(pgid pgid) bool {
return f.cache[pgid]
}
// read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) {
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
idx, count := 0, int(p.count)
if count == 0xFFFF {
idx = 1
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
}
// Copy the list of page ids from the freelist.
if count == 0 {
f.ids = nil
} else {
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count]
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
// Make sure they're sorted.
sort.Sort(pgids(f.ids))
}
// Rebuild the page cache.
f.reindex()
}
// write writes the page ids onto a freelist page. All free and pending ids are
// saved to disk since in the event of a program crash, all pending ids will
// become free.
func (f *freelist) write(p *page) error {
// Combine the old free pgids and pgids waiting on an open transaction.
// Update the header flag.
p.flags |= freelistPageFlag
// The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element.
lenids := f.count()
if lenids == 0 {
p.count = uint16(lenids)
} else if lenids < 0xFFFF {
p.count = uint16(lenids)
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
} else {
p.count = 0xFFFF
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
}
return nil
}
// reload reads the freelist from a page and filters out pending items.
func (f *freelist) reload(p *page) {
f.read(p)
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
pcache[pendingID] = true
}
}
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range f.ids {
if !pcache[id] {
a = append(a, id)
}
}
f.ids = a
// Once the available list is rebuilt then rebuild the free cache so that
// it includes the available and pending free pages.
f.reindex()
}
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool, len(f.ids))
for _, id := range f.ids {
f.cache[id] = true
}
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
f.cache[pendingID] = true
}
}
}

604
vendor/github.com/boltdb/bolt/node.go generated vendored
View file

@ -1,604 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"sort"
"unsafe"
)
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid pgid
parent *node
children nodes
inodes inodes
}
// root returns the top-level node this node is attached to.
func (n *node) root() *node {
if n.parent == nil {
return n
}
return n.parent.root()
}
// minKeys returns the minimum number of inodes this node should have.
func (n *node) minKeys() int {
if n.isLeaf {
return 1
}
return 2
}
// size returns the size of the node after serialization.
func (n *node) size() int {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
}
return sz
}
// sizeLessThan returns true if the node is less than a given size.
// This is an optimization to avoid calculating a large node when we only need
// to know if it fits inside a certain page size.
func (n *node) sizeLessThan(v int) bool {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
if sz >= v {
return false
}
}
return true
}
// pageElementSize returns the size of each page element based on the type of node.
func (n *node) pageElementSize() int {
if n.isLeaf {
return leafPageElementSize
}
return branchPageElementSize
}
// childAt returns the child node at a given index.
func (n *node) childAt(index int) *node {
if n.isLeaf {
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
}
return n.bucket.node(n.inodes[index].pgid, n)
}
// childIndex returns the index of a given child node.
func (n *node) childIndex(child *node) int {
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
return index
}
// numChildren returns the number of children.
func (n *node) numChildren() int {
return len(n.inodes)
}
// nextSibling returns the next node with the same parent.
func (n *node) nextSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index >= n.parent.numChildren()-1 {
return nil
}
return n.parent.childAt(index + 1)
}
// prevSibling returns the previous node with the same parent.
func (n *node) prevSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index == 0 {
return nil
}
return n.parent.childAt(index - 1)
}
// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
if pgid >= n.bucket.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
} else if len(oldKey) <= 0 {
panic("put: zero-length old key")
} else if len(newKey) <= 0 {
panic("put: zero-length new key")
}
// Find insertion index.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
// Add capacity and shift nodes if we don't have an exact match and need to insert.
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
if !exact {
n.inodes = append(n.inodes, inode{})
copy(n.inodes[index+1:], n.inodes[index:])
}
inode := &n.inodes[index]
inode.flags = flags
inode.key = newKey
inode.value = value
inode.pgid = pgid
_assert(len(inode.key) > 0, "put: zero-length inode key")
}
// del removes a key from the node.
func (n *node) del(key []byte) {
// Find index of key.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
// Exit if the key isn't found.
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
return
}
// Delete inode from the node.
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
// Mark the node as needing rebalancing.
n.unbalanced = true
}
// read initializes the node from a page.
func (n *node) read(p *page) {
n.pgid = p.id
n.isLeaf = ((p.flags & leafPageFlag) != 0)
n.inodes = make(inodes, int(p.count))
for i := 0; i < int(p.count); i++ {
inode := &n.inodes[i]
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
inode.flags = elem.flags
inode.key = elem.key()
inode.value = elem.value()
} else {
elem := p.branchPageElement(uint16(i))
inode.pgid = elem.pgid
inode.key = elem.key()
}
_assert(len(inode.key) > 0, "read: zero-length inode key")
}
// Save first key so we can find the node in the parent when we spill.
if len(n.inodes) > 0 {
n.key = n.inodes[0].key
_assert(len(n.key) > 0, "read: zero-length node key")
} else {
n.key = nil
}
}
// write writes the items onto one or more pages.
func (n *node) write(p *page) {
// Initialize page.
if n.isLeaf {
p.flags |= leafPageFlag
} else {
p.flags |= branchPageFlag
}
if len(n.inodes) >= 0xFFFF {
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
}
p.count = uint16(len(n.inodes))
// Stop here if there are no items to write.
if p.count == 0 {
return
}
// Loop over each item and write it to the page.
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
for i, item := range n.inodes {
_assert(len(item.key) > 0, "write: zero-length inode key")
// Write the page element.
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.flags = item.flags
elem.ksize = uint32(len(item.key))
elem.vsize = uint32(len(item.value))
} else {
elem := p.branchPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.ksize = uint32(len(item.key))
elem.pgid = item.pgid
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// If the length of key+value is larger than the max allocation size
// then we need to reallocate the byte array pointer.
//
// See: https://github.com/boltdb/bolt/pull/335
klen, vlen := len(item.key), len(item.value)
if len(b) < klen+vlen {
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
}
// Write data for the element to the end of the page.
copy(b[0:], item.key)
b = b[klen:]
copy(b[0:], item.value)
b = b[vlen:]
}
// DEBUG ONLY: n.dump()
}
// split breaks up a node into multiple smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize int) []*node {
var nodes []*node
node := n
for {
// Split node into two.
a, b := node.splitTwo(pageSize)
nodes = append(nodes, a)
// If we can't split then exit the loop.
if b == nil {
break
}
// Set node to b so it gets split on the next iteration.
node = b
}
return nodes
}
// splitTwo breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the split() function.
func (n *node) splitTwo(pageSize int) (*node, *node) {
// Ignore the split if the page doesn't have at least enough nodes for
// two pages or if the nodes can fit in a single page.
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
return n, nil
}
// Determine the threshold before starting a new node.
var fillPercent = n.bucket.FillPercent
if fillPercent < minFillPercent {
fillPercent = minFillPercent
} else if fillPercent > maxFillPercent {
fillPercent = maxFillPercent
}
threshold := int(float64(pageSize) * fillPercent)
// Determine split position and sizes of the two pages.
splitIndex, _ := n.splitIndex(threshold)
// Split node into two separate nodes.
// If there's no parent then we'll need to create one.
if n.parent == nil {
n.parent = &node{bucket: n.bucket, children: []*node{n}}
}
// Create a new node and add it to the parent.
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
n.parent.children = append(n.parent.children, next)
// Split inodes across two nodes.
next.inodes = n.inodes[splitIndex:]
n.inodes = n.inodes[:splitIndex]
// Update the statistics.
n.bucket.tx.stats.Split++
return n, next
}
// splitIndex finds the position where a page will fill a given threshold.
// It returns the index as well as the size of the first page.
// This is only be called from split().
func (n *node) splitIndex(threshold int) (index, sz int) {
sz = pageHeaderSize
// Loop until we only have the minimum number of keys required for the second page.
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
index = i
inode := n.inodes[i]
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
// If we have at least the minimum number of keys and adding another
// node would put us over the threshold then exit and return.
if i >= minKeysPerPage && sz+elsize > threshold {
break
}
// Add the element size to the total size.
sz += elsize
}
return
}
// spill writes the nodes to dirty pages and splits nodes as it goes.
// Returns an error if dirty pages cannot be allocated.
func (n *node) spill() error {
var tx = n.bucket.tx
if n.spilled {
return nil
}
// Spill child nodes first. Child nodes can materialize sibling nodes in
// the case of split-merge so we cannot use a range loop. We have to check
// the children size on every loop iteration.
sort.Sort(n.children)
for i := 0; i < len(n.children); i++ {
if err := n.children[i].spill(); err != nil {
return err
}
}
// We no longer need the child list because it's only used for spill tracking.
n.children = nil
// Split nodes into appropriate sizes. The first node will always be n.
var nodes = n.split(tx.db.pageSize)
for _, node := range nodes {
// Add node's page to the freelist if it's not new.
if node.pgid > 0 {
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
node.pgid = 0
}
// Allocate contiguous space for the node.
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
if err != nil {
return err
}
// Write the node.
if p.id >= tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
}
node.pgid = p.id
node.write(p)
node.spilled = true
// Insert into parent inodes.
if node.parent != nil {
var key = node.key
if key == nil {
key = node.inodes[0].key
}
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
node.key = node.inodes[0].key
_assert(len(node.key) > 0, "spill: zero-length node key")
}
// Update the statistics.
tx.stats.Spill++
}
// If the root node split and created a new root then we need to spill that
// as well. We'll clear out the children to make sure it doesn't try to respill.
if n.parent != nil && n.parent.pgid == 0 {
n.children = nil
return n.parent.spill()
}
return nil
}
// rebalance attempts to combine the node with sibling nodes if the node fill
// size is below a threshold or if there are not enough keys.
func (n *node) rebalance() {
if !n.unbalanced {
return
}
n.unbalanced = false
// Update statistics.
n.bucket.tx.stats.Rebalance++
// Ignore if node is above threshold (25%) and has enough keys.
var threshold = n.bucket.tx.db.pageSize / 4
if n.size() > threshold && len(n.inodes) > n.minKeys() {
return
}
// Root node has special handling.
if n.parent == nil {
// If root node is a branch and only has one node then collapse it.
if !n.isLeaf && len(n.inodes) == 1 {
// Move root's child up.
child := n.bucket.node(n.inodes[0].pgid, n)
n.isLeaf = child.isLeaf
n.inodes = child.inodes[:]
n.children = child.children
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent = n
}
}
// Remove old child.
child.parent = nil
delete(n.bucket.nodes, child.pgid)
child.free()
}
return
}
// If node has no keys then just remove it.
if n.numChildren() == 0 {
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
n.parent.rebalance()
return
}
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
// Destination node is right sibling if idx == 0, otherwise left sibling.
var target *node
var useNextSibling = (n.parent.childIndex(n) == 0)
if useNextSibling {
target = n.nextSibling()
} else {
target = n.prevSibling()
}
// If both this node and the target node are too small then merge them.
if useNextSibling {
// Reparent all child nodes being moved.
for _, inode := range target.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = n
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes from target and remove target.
n.inodes = append(n.inodes, target.inodes...)
n.parent.del(target.key)
n.parent.removeChild(target)
delete(n.bucket.nodes, target.pgid)
target.free()
} else {
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = target
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes to target and remove node.
target.inodes = append(target.inodes, n.inodes...)
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
}
// Either this node or the target node was deleted from the parent so rebalance it.
n.parent.rebalance()
}
// removes a node from the list of in-memory children.
// This does not affect the inodes.
func (n *node) removeChild(target *node) {
for i, child := range n.children {
if child == target {
n.children = append(n.children[:i], n.children[i+1:]...)
return
}
}
}
// dereference causes the node to copy all its inode key/value references to heap memory.
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
func (n *node) dereference() {
if n.key != nil {
key := make([]byte, len(n.key))
copy(key, n.key)
n.key = key
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
}
for i := range n.inodes {
inode := &n.inodes[i]
key := make([]byte, len(inode.key))
copy(key, inode.key)
inode.key = key
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
value := make([]byte, len(inode.value))
copy(value, inode.value)
inode.value = value
}
// Recursively dereference children.
for _, child := range n.children {
child.dereference()
}
// Update statistics.
n.bucket.tx.stats.NodeDeref++
}
// free adds the node's underlying page to the freelist.
func (n *node) free() {
if n.pgid != 0 {
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
n.pgid = 0
}
}
// dump writes the contents of the node to STDERR for debugging purposes.
/*
func (n *node) dump() {
// Write node header.
var typ = "branch"
if n.isLeaf {
typ = "leaf"
}
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
// Write out abbreviated version of each item.
for _, item := range n.inodes {
if n.isLeaf {
if item.flags&bucketLeafFlag != 0 {
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
} else {
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
}
} else {
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
}
}
warn("")
}
*/
type nodes []*node
func (s nodes) Len() int { return len(s) }
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type inode struct {
flags uint32
pgid pgid
key []byte
value []byte
}
type inodes []inode

197
vendor/github.com/boltdb/bolt/page.go generated vendored
View file

@ -1,197 +0,0 @@
package bolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
const minKeysPerPage = 2
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
const (
branchPageFlag = 0x01
leafPageFlag = 0x02
metaPageFlag = 0x04
freelistPageFlag = 0x10
)
const (
bucketLeafFlag = 0x01
)
type pgid uint64
type page struct {
id pgid
flags uint16
count uint16
overflow uint32
ptr uintptr
}
// typ returns a human readable page type string used for debugging.
func (p *page) typ() string {
if (p.flags & branchPageFlag) != 0 {
return "branch"
} else if (p.flags & leafPageFlag) != 0 {
return "leaf"
} else if (p.flags & metaPageFlag) != 0 {
return "meta"
} else if (p.flags & freelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafe.Pointer(&p.ptr))
}
// leafPageElement retrieves the leaf node by index
func (p *page) leafPageElement(index uint16) *leafPageElement {
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
return n
}
// leafPageElements retrieves a list of leaf nodes.
func (p *page) leafPageElements() []leafPageElement {
if p.count == 0 {
return nil
}
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// branchPageElement retrieves the branch node by index
func (p *page) branchPageElement(index uint16) *branchPageElement {
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
}
// branchPageElements retrieves a list of branch nodes.
func (p *page) branchPageElements() []branchPageElement {
if p.count == 0 {
return nil
}
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *page) hexdump(n int) {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
type pages []*page
func (s pages) Len() int { return len(s) }
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
// branchPageElement represents a node on a branch page.
type branchPageElement struct {
pos uint32
ksize uint32
pgid pgid
}
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// leafPageElement represents a node on a leaf page.
type leafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
}
// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
Count int
OverflowCount int
}
type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
merged := make(pgids, len(a)+len(b))
mergepgids(merged, a, b)
return merged
}
// mergepgids copies the sorted union of a and b into dst.
// If dst is too small, it panics.
func mergepgids(dst, a, b pgids) {
if len(dst) < len(a)+len(b) {
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
}
// Copy in the opposite slice if one is nil.
if len(a) == 0 {
copy(dst, b)
return
}
if len(b) == 0 {
copy(dst, a)
return
}
// Merged will hold all elements from both lists.
merged := dst[:0]
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
_ = append(merged, follow...)
}

684
vendor/github.com/boltdb/bolt/tx.go generated vendored
View file

@ -1,684 +0,0 @@
package bolt
import (
"fmt"
"io"
"os"
"sort"
"strings"
"time"
"unsafe"
)
// txid represents the internal transaction identifier.
type txid uint64
// Tx represents a read-only or read/write transaction on the database.
// Read-only transactions can be used for retrieving values for keys and creating cursors.
// Read/write transactions can create and remove buckets and create and remove keys.
//
// IMPORTANT: You must commit or rollback transactions when you are done with
// them. Pages can not be reclaimed by the writer until no more transactions
// are using them. A long running read transaction can cause the database to
// quickly grow.
type Tx struct {
writable bool
managed bool
db *DB
meta *meta
root Bucket
pages map[pgid]*page
stats TxStats
commitHandlers []func()
// WriteFlag specifies the flag for write-related methods like WriteTo().
// Tx opens the database file with the specified flag to copy the data.
//
// By default, the flag is unset, which works well for mostly in-memory
// workloads. For databases that are much larger than available RAM,
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
WriteFlag int
}
// init initializes the transaction.
func (tx *Tx) init(db *DB) {
tx.db = db
tx.pages = nil
// Copy the meta page since it can be changed by the writer.
tx.meta = &meta{}
db.meta().copy(tx.meta)
// Copy over the root bucket.
tx.root = newBucket(tx)
tx.root.bucket = &bucket{}
*tx.root.bucket = tx.meta.root
// Increment the transaction id and add a page cache for writable transactions.
if tx.writable {
tx.pages = make(map[pgid]*page)
tx.meta.txid += txid(1)
}
}
// ID returns the transaction id.
func (tx *Tx) ID() int {
return int(tx.meta.txid)
}
// DB returns a reference to the database that created the transaction.
func (tx *Tx) DB() *DB {
return tx.db
}
// Size returns current database size in bytes as seen by this transaction.
func (tx *Tx) Size() int64 {
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
}
// Writable returns whether the transaction can perform write operations.
func (tx *Tx) Writable() bool {
return tx.writable
}
// Cursor creates a cursor associated with the root bucket.
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (tx *Tx) Cursor() *Cursor {
return tx.root.Cursor()
}
// Stats retrieves a copy of the current transaction statistics.
func (tx *Tx) Stats() TxStats {
return tx.stats
}
// Bucket retrieves a bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) Bucket(name []byte) *Bucket {
return tx.root.Bucket(name)
}
// CreateBucket creates a new bucket.
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
return tx.root.CreateBucket(name)
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
return tx.root.CreateBucketIfNotExists(name)
}
// DeleteBucket deletes a bucket.
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
func (tx *Tx) DeleteBucket(name []byte) error {
return tx.root.DeleteBucket(name)
}
// ForEach executes a function for each bucket in the root.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller.
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
return tx.root.ForEach(func(k, v []byte) error {
if err := fn(k, tx.root.Bucket(k)); err != nil {
return err
}
return nil
})
}
// OnCommit adds a handler function to be executed after the transaction successfully commits.
func (tx *Tx) OnCommit(fn func()) {
tx.commitHandlers = append(tx.commitHandlers, fn)
}
// Commit writes all changes to disk and updates the meta page.
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
return ErrTxClosed
} else if !tx.writable {
return ErrTxNotWritable
}
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
// Rebalance nodes which have had deletions.
var startTime = time.Now()
tx.root.rebalance()
if tx.stats.Rebalance > 0 {
tx.stats.RebalanceTime += time.Since(startTime)
}
// spill data onto dirty pages.
startTime = time.Now()
if err := tx.root.spill(); err != nil {
tx.rollback()
return err
}
tx.stats.SpillTime += time.Since(startTime)
// Free the old root bucket.
tx.meta.root.root = tx.root.root
opgid := tx.meta.pgid
// Free the freelist and allocate new pages for it. This will overestimate
// the size of the freelist but not underestimate the size (which would be bad).
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
if err != nil {
tx.rollback()
return err
}
if err := tx.db.freelist.write(p); err != nil {
tx.rollback()
return err
}
tx.meta.freelist = p.id
// If the high water mark has moved up then attempt to grow the database.
if tx.meta.pgid > opgid {
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
tx.rollback()
return err
}
}
// Write dirty pages to disk.
startTime = time.Now()
if err := tx.write(); err != nil {
tx.rollback()
return err
}
// If strict mode is enabled then perform a consistency check.
// Only the first consistency error is reported in the panic.
if tx.db.StrictMode {
ch := tx.Check()
var errs []string
for {
err, ok := <-ch
if !ok {
break
}
errs = append(errs, err.Error())
}
if len(errs) > 0 {
panic("check fail: " + strings.Join(errs, "\n"))
}
}
// Write meta to disk.
if err := tx.writeMeta(); err != nil {
tx.rollback()
return err
}
tx.stats.WriteTime += time.Since(startTime)
// Finalize the transaction.
tx.close()
// Execute commit handlers now that the locks have been removed.
for _, fn := range tx.commitHandlers {
fn()
}
return nil
}
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
return ErrTxClosed
}
tx.rollback()
return nil
}
func (tx *Tx) rollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
}
tx.close()
}
func (tx *Tx) close() {
if tx.db == nil {
return
}
if tx.writable {
// Grab freelist stats.
var freelistFreeN = tx.db.freelist.free_count()
var freelistPendingN = tx.db.freelist.pending_count()
var freelistAlloc = tx.db.freelist.size()
// Remove transaction ref & writer lock.
tx.db.rwtx = nil
tx.db.rwlock.Unlock()
// Merge statistics.
tx.db.statlock.Lock()
tx.db.stats.FreePageN = freelistFreeN
tx.db.stats.PendingPageN = freelistPendingN
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
tx.db.stats.FreelistInuse = freelistAlloc
tx.db.stats.TxStats.add(&tx.stats)
tx.db.statlock.Unlock()
} else {
tx.db.removeTx(tx)
}
// Clear all references.
tx.db = nil
tx.meta = nil
tx.root = Bucket{tx: tx}
tx.pages = nil
}
// Copy writes the entire database to a writer.
// This function exists for backwards compatibility. Use WriteTo() instead.
func (tx *Tx) Copy(w io.Writer) error {
_, err := tx.WriteTo(w)
return err
}
// WriteTo writes the entire database to a writer.
// If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader with WriteFlag
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
if err != nil {
return 0, err
}
defer func() { _ = f.Close() }()
// Generate a meta page. We use the same page data for both meta pages.
buf := make([]byte, tx.db.pageSize)
page := (*page)(unsafe.Pointer(&buf[0]))
page.flags = metaPageFlag
*page.meta() = *tx.meta
// Write meta 0.
page.id = 0
page.meta().checksum = page.meta().sum64()
nn, err := w.Write(buf)
n += int64(nn)
if err != nil {
return n, fmt.Errorf("meta 0 copy: %s", err)
}
// Write meta 1 with a lower transaction id.
page.id = 1
page.meta().txid -= 1
page.meta().checksum = page.meta().sum64()
nn, err = w.Write(buf)
n += int64(nn)
if err != nil {
return n, fmt.Errorf("meta 1 copy: %s", err)
}
// Move past the meta pages in the file.
if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
return n, fmt.Errorf("seek: %s", err)
}
// Copy data pages.
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
n += wn
if err != nil {
return n, err
}
return n, f.Close()
}
// CopyFile copies the entire database to file at the given path.
// A reader transaction is maintained during the copy so it is safe to continue
// using the database while a copy is in progress.
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil {
return err
}
err = tx.Copy(f)
if err != nil {
_ = f.Close()
return err
}
return f.Close()
}
// Check performs several consistency checks on the database for this transaction.
// An error is returned if any inconsistency is found.
//
// It can be safely run concurrently on a writable transaction. However, this
// incurs a high cost for large databases and databases with a lot of subbuckets
// because of caching. This overhead can be removed if running on a read-only
// transaction, however, it is not safe to execute other writer transactions at
// the same time.
func (tx *Tx) Check() <-chan error {
ch := make(chan error)
go tx.check(ch)
return ch
}
func (tx *Tx) check(ch chan error) {
// Check if any pages are double freed.
freed := make(map[pgid]bool)
all := make([]pgid, tx.db.freelist.count())
tx.db.freelist.copyall(all)
for _, id := range all {
if freed[id] {
ch <- fmt.Errorf("page %d: already freed", id)
}
freed[id] = true
}
// Track every reachable page.
reachable := make(map[pgid]*page)
reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
}
// Recursively check buckets.
tx.checkBucket(&tx.root, reachable, freed, ch)
// Ensure all pages below high water mark are either reachable or freed.
for i := pgid(0); i < tx.meta.pgid; i++ {
_, isReachable := reachable[i]
if !isReachable && !freed[i] {
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
}
}
// Close the channel to signal completion.
close(ch)
}
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
// Ignore inline buckets.
if b.root == 0 {
return
}
// Check every page used by this bucket.
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
if p.id > tx.meta.pgid {
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
}
// Ensure each page is only referenced once.
for i := pgid(0); i <= pgid(p.overflow); i++ {
var id = p.id + i
if _, ok := reachable[id]; ok {
ch <- fmt.Errorf("page %d: multiple references", int(id))
}
reachable[id] = p
}
// We should only encounter un-freed leaf and branch pages.
if freed[p.id] {
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
}
})
// Check each bucket within this bucket.
_ = b.ForEach(func(k, v []byte) error {
if child := b.Bucket(k); child != nil {
tx.checkBucket(child, reachable, freed, ch)
}
return nil
})
}
// allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) {
p, err := tx.db.allocate(count)
if err != nil {
return nil, err
}
// Save to our page cache.
tx.pages[p.id] = p
// Update statistics.
tx.stats.PageCount++
tx.stats.PageAlloc += count * tx.db.pageSize
return p, nil
}
// write writes any dirty pages to disk.
func (tx *Tx) write() error {
// Sort pages by id.
pages := make(pages, 0, len(tx.pages))
for _, p := range tx.pages {
pages = append(pages, p)
}
// Clear out page cache early.
tx.pages = make(map[pgid]*page)
sort.Sort(pages)
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
offset := int64(p.id) * int64(tx.db.pageSize)
// Write out page in "max allocation" sized chunks.
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
// Write chunk to disk.
buf := ptr[:sz]
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
}
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Put small pages back to page pool.
for _, p := range pages {
// Ignore page sizes over 1 page.
// These are allocated using make() instead of the page pool.
if int(p.overflow) != 0 {
continue
}
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
for i := range buf {
buf[i] = 0
}
tx.db.pagePool.Put(buf)
}
return nil
}
// writeMeta writes the meta to the disk.
func (tx *Tx) writeMeta() error {
// Create a temporary buffer for the meta page.
buf := make([]byte, tx.db.pageSize)
p := tx.db.pageInBuffer(buf, 0)
tx.meta.write(p)
// Write the meta page to file.
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
return err
}
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Update statistics.
tx.stats.Write++
return nil
}
// page returns a reference to the page with a given id.
// If page has been written to then a temporary buffered page is returned.
func (tx *Tx) page(id pgid) *page {
// Check the dirty pages first.
if tx.pages != nil {
if p, ok := tx.pages[id]; ok {
return p
}
}
// Otherwise return directly from the mmap.
return tx.db.page(id)
}
// forEachPage iterates over every page within a given page and executes a function.
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
p := tx.page(pgid)
// Execute function.
fn(p, depth)
// Recursively loop over children.
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
tx.forEachPage(elem.pgid, depth+1, fn)
}
}
}
// Page returns page information for a given page number.
// This is only safe for concurrent use when used by a writable transaction.
func (tx *Tx) Page(id int) (*PageInfo, error) {
if tx.db == nil {
return nil, ErrTxClosed
} else if pgid(id) >= tx.meta.pgid {
return nil, nil
}
// Build the page info.
p := tx.db.page(pgid(id))
info := &PageInfo{
ID: id,
Count: int(p.count),
OverflowCount: int(p.overflow),
}
// Determine the type (or if it's free).
if tx.db.freelist.freed(pgid(id)) {
info.Type = "free"
} else {
info.Type = p.typ()
}
return info, nil
}
// TxStats represents statistics about the actions performed by the transaction.
type TxStats struct {
// Page statistics.
PageCount int // number of page allocations
PageAlloc int // total bytes allocated
// Cursor statistics.
CursorCount int // number of cursors created
// Node statistics
NodeCount int // number of node allocations
NodeDeref int // number of node dereferences
// Rebalance statistics.
Rebalance int // number of node rebalances
RebalanceTime time.Duration // total time spent rebalancing
// Split/Spill statistics.
Split int // number of nodes split
Spill int // number of nodes spilled
SpillTime time.Duration // total time spent spilling
// Write statistics.
Write int // number of writes performed
WriteTime time.Duration // total time spent writing to disk
}
func (s *TxStats) add(other *TxStats) {
s.PageCount += other.PageCount
s.PageAlloc += other.PageAlloc
s.CursorCount += other.CursorCount
s.NodeCount += other.NodeCount
s.NodeDeref += other.NodeDeref
s.Rebalance += other.Rebalance
s.RebalanceTime += other.RebalanceTime
s.Split += other.Split
s.Spill += other.Spill
s.SpillTime += other.SpillTime
s.Write += other.Write
s.WriteTime += other.WriteTime
}
// Sub calculates and returns the difference between two sets of transaction stats.
// This is useful when obtaining stats at two different points and time and
// you need the performance counters that occurred within that time span.
func (s *TxStats) Sub(other *TxStats) TxStats {
var diff TxStats
diff.PageCount = s.PageCount - other.PageCount
diff.PageAlloc = s.PageAlloc - other.PageAlloc
diff.CursorCount = s.CursorCount - other.CursorCount
diff.NodeCount = s.NodeCount - other.NodeCount
diff.NodeDeref = s.NodeDeref - other.NodeDeref
diff.Rebalance = s.Rebalance - other.Rebalance
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
diff.Split = s.Split - other.Split
diff.Spill = s.Spill - other.Spill
diff.SpillTime = s.SpillTime - other.SpillTime
diff.Write = s.Write - other.Write
diff.WriteTime = s.WriteTime - other.WriteTime
return diff
}

View file

@ -15,7 +15,7 @@ import "time"
// BackOff is a backoff policy for retrying an operation.
type BackOff interface {
// NextBackOff returns the duration to wait before retrying the operation,
// or backoff.Stop to indicate that no more retries should be made.
// or backoff. Stop to indicate that no more retries should be made.
//
// Example usage:
//

View file

@ -63,6 +63,7 @@ type ExponentialBackOff struct {
currentInterval time.Duration
startTime time.Time
random *rand.Rand
}
// Clock is an interface that returns current time for BackOff.
@ -88,6 +89,7 @@ func NewExponentialBackOff() *ExponentialBackOff {
MaxInterval: DefaultMaxInterval,
MaxElapsedTime: DefaultMaxElapsedTime,
Clock: SystemClock,
random: rand.New(rand.NewSource(time.Now().UnixNano())),
}
b.Reset()
return b
@ -116,13 +118,18 @@ func (b *ExponentialBackOff) NextBackOff() time.Duration {
return Stop
}
defer b.incrementCurrentInterval()
return getRandomValueFromInterval(b.RandomizationFactor, rand.Float64(), b.currentInterval)
if b.random == nil {
b.random = rand.New(rand.NewSource(time.Now().UnixNano()))
}
return getRandomValueFromInterval(b.RandomizationFactor, b.random.Float64(), b.currentInterval)
}
// GetElapsedTime returns the elapsed time since an ExponentialBackOff instance
// is created and is reset when Reset() is called.
//
// The elapsed time is computed using time.Now().UnixNano().
// The elapsed time is computed using time.Now().UnixNano(). It is
// safe to call even while the backoff policy is used by a running
// ticker.
func (b *ExponentialBackOff) GetElapsedTime() time.Duration {
return b.Clock.Now().Sub(b.startTime)
}

View file

@ -18,9 +18,12 @@ type Ticker struct {
stopOnce sync.Once
}
// NewTicker returns a new Ticker containing a channel that will send the time at times
// specified by the BackOff argument. Ticker is guaranteed to tick at least once.
// The channel is closed when Stop method is called or BackOff stops.
// NewTicker returns a new Ticker containing a channel that will send
// the time at times specified by the BackOff argument. Ticker is
// guaranteed to tick at least once. The channel is closed when Stop
// method is called or BackOff stops. It is not safe to manipulate the
// provided backoff policy (notably calling NextBackOff or Reset)
// while the ticker is running.
func NewTicker(b BackOff) *Ticker {
c := make(chan time.Time)
t := &Ticker{
@ -29,6 +32,7 @@ func NewTicker(b BackOff) *Ticker {
b: ensureContext(b),
stop: make(chan struct{}),
}
t.b.Reset()
go t.run()
runtime.SetFinalizer(t, (*Ticker).Stop)
return t
@ -42,7 +46,6 @@ func (t *Ticker) Stop() {
func (t *Ticker) run() {
c := t.c
defer close(c)
t.b.Reset()
// Ticker is guaranteed to tick at least once.
afterC := t.send(time.Now())

35
vendor/github.com/cenk/backoff/tries.go generated vendored Normal file
View file

@ -0,0 +1,35 @@
package backoff
import "time"
/*
WithMaxRetries creates a wrapper around another BackOff, which will
return Stop if NextBackOff() has been called too many times since
the last time Reset() was called
Note: Implementation is not thread-safe.
*/
func WithMaxRetries(b BackOff, max uint64) BackOff {
return &backOffTries{delegate: b, maxTries: max}
}
type backOffTries struct {
delegate BackOff
maxTries uint64
numTries uint64
}
func (b *backOffTries) NextBackOff() time.Duration {
if b.maxTries > 0 {
if b.maxTries <= b.numTries {
return Stop
}
b.numTries++
}
return b.delegate.NextBackOff()
}
func (b *backOffTries) Reset() {
b.numTries = 0
b.delegate.Reset()
}

View file

@ -48,12 +48,8 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
}
largestValueWithSingleUnitResolution := 2 * power(10, int64(sigfigs))
// we need to shove these down to float32 or the math is wrong
a := float32(math.Log(float64(largestValueWithSingleUnitResolution)))
b := float32(math.Log(2))
subBucketCountMagnitude := int32(math.Ceil(float64(a / b)))
largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))
subBucketHalfCountMagnitude := subBucketCountMagnitude
if subBucketHalfCountMagnitude < 1 {
@ -61,7 +57,7 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
}
subBucketHalfCountMagnitude--
unitMagnitude := int32(math.Floor(math.Log(float64(minValue)) / math.Log(2)))
unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
if unitMagnitude < 0 {
unitMagnitude = 0
}
@ -124,6 +120,11 @@ func (h *Histogram) Merge(from *Histogram) (dropped int64) {
return
}
// TotalCount returns total number of values recorded.
func (h *Histogram) TotalCount() int64 {
return h.totalCount
}
// Max returns the approximate maximum recorded value.
func (h *Histogram) Max() int64 {
var max int64
@ -133,7 +134,7 @@ func (h *Histogram) Max() int64 {
max = i.highestEquivalentValue
}
}
return h.lowestEquivalentValue(max)
return h.highestEquivalentValue(max)
}
// Min returns the approximate minimum recorded value.
@ -151,6 +152,9 @@ func (h *Histogram) Min() int64 {
// Mean returns the approximate arithmetic mean of the recorded values.
func (h *Histogram) Mean() float64 {
if h.totalCount == 0 {
return 0
}
var total int64
i := h.iterator()
for i.next() {
@ -163,6 +167,10 @@ func (h *Histogram) Mean() float64 {
// StdDev returns the approximate standard deviation of the recorded values.
func (h *Histogram) StdDev() float64 {
if h.totalCount == 0 {
return 0
}
mean := h.Mean()
geometricDevTotal := 0.0
@ -267,6 +275,49 @@ func (h *Histogram) CumulativeDistribution() []Bracket {
return result
}
// SignificantFigures returns the significant figures used to create the
// histogram
func (h *Histogram) SignificantFigures() int64 {
return h.significantFigures
}
// LowestTrackableValue returns the lower bound on values that will be added
// to the histogram
func (h *Histogram) LowestTrackableValue() int64 {
return h.lowestTrackableValue
}
// HighestTrackableValue returns the upper bound on values that will be added
// to the histogram
func (h *Histogram) HighestTrackableValue() int64 {
return h.highestTrackableValue
}
// Histogram bar for plotting
type Bar struct {
From, To, Count int64
}
// Pretty print as csv for easy plotting
func (b Bar) String() string {
return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count)
}
// Distribution returns an ordered list of bars of the
// distribution of recorded values, counts can be normalized to a probability
func (h *Histogram) Distribution() (result []Bar) {
i := h.iterator()
for i.next() {
result = append(result, Bar{
Count: i.countAtIdx,
From: h.lowestEquivalentValue(i.valueFromIdx),
To: i.highestEquivalentValue,
})
}
return result
}
// Equals returns true if the two Histograms are equivalent, false if not.
func (h *Histogram) Equals(other *Histogram) bool {
switch {
@ -300,11 +351,12 @@ func (h *Histogram) Export() *Snapshot {
LowestTrackableValue: h.lowestTrackableValue,
HighestTrackableValue: h.highestTrackableValue,
SignificantFigures: h.significantFigures,
Counts: h.counts,
Counts: append([]int64(nil), h.counts...), // copy
}
}
// Import returns a new Histogram populated from the Snapshot data.
// Import returns a new Histogram populated from the Snapshot data (which the
// caller must stop accessing).
func Import(s *Snapshot) *Histogram {
h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures))
h.counts = s.Counts
@ -478,7 +530,7 @@ func (p *pIterator) next() bool {
currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount)
if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile {
p.percentile = p.percentileToIteratorTo
halfDistance := math.Pow(2, (math.Log(100.0/(100.0-(p.percentileToIteratorTo)))/math.Log(2))+1)
halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1))
percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance
p.percentileToIteratorTo += 100.0 / percentileReportingTicks
return true
@ -510,12 +562,3 @@ func bitLen(x int64) (n int64) {
}
return
}
func power(base, exp int64) (n int64) {
n = 1
for exp > 0 {
n *= base
exp--
}
return
}

View file

@ -132,9 +132,9 @@ func (f *freelist) free(txid txid, p *page) {
allocTxid, ok := f.allocs[p.id]
if ok {
delete(f.allocs, p.id)
} else if (p.flags & (freelistPageFlag | metaPageFlag)) != 0 {
// Safe to claim txid as allocating since these types are private to txid.
allocTxid = txid
} else if (p.flags & freelistPageFlag) != 0 {
// Freelist is always allocated by prior tx.
allocTxid = txid - 1
}
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
@ -233,6 +233,9 @@ func (f *freelist) freed(pgid pgid) bool {
// read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) {
if (p.flags & freelistPageFlag) == 0 {
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
}
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
idx, count := 0, int(p.count)

16
vendor/github.com/coreos/etcd/auth/doc.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package auth provides client role authentication for accessing keys in etcd.
package auth

137
vendor/github.com/coreos/etcd/auth/jwt.go generated vendored Normal file
View file

@ -0,0 +1,137 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"crypto/rsa"
"io/ioutil"
jwt "github.com/dgrijalva/jwt-go"
"golang.org/x/net/context"
)
type tokenJWT struct {
signMethod string
signKey *rsa.PrivateKey
verifyKey *rsa.PublicKey
}
func (t *tokenJWT) enable() {}
func (t *tokenJWT) disable() {}
func (t *tokenJWT) invalidateUser(string) {}
func (t *tokenJWT) genTokenPrefix() (string, error) { return "", nil }
func (t *tokenJWT) info(ctx context.Context, token string, rev uint64) (*AuthInfo, bool) {
// rev isn't used in JWT, it is only used in simple token
var (
username string
revision uint64
)
parsed, err := jwt.Parse(token, func(token *jwt.Token) (interface{}, error) {
return t.verifyKey, nil
})
switch err.(type) {
case nil:
if !parsed.Valid {
plog.Warningf("invalid jwt token: %s", token)
return nil, false
}
claims := parsed.Claims.(jwt.MapClaims)
username = claims["username"].(string)
revision = uint64(claims["revision"].(float64))
default:
plog.Warningf("failed to parse jwt token: %s", err)
return nil, false
}
return &AuthInfo{Username: username, Revision: revision}, true
}
func (t *tokenJWT) assign(ctx context.Context, username string, revision uint64) (string, error) {
// Future work: let a jwt token include permission information would be useful for
// permission checking in proxy side.
tk := jwt.NewWithClaims(jwt.GetSigningMethod(t.signMethod),
jwt.MapClaims{
"username": username,
"revision": revision,
})
token, err := tk.SignedString(t.signKey)
if err != nil {
plog.Debugf("failed to sign jwt token: %s", err)
return "", err
}
plog.Debugf("jwt token: %s", token)
return token, err
}
func prepareOpts(opts map[string]string) (jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath string, err error) {
for k, v := range opts {
switch k {
case "sign-method":
jwtSignMethod = v
case "pub-key":
jwtPubKeyPath = v
case "priv-key":
jwtPrivKeyPath = v
default:
plog.Errorf("unknown token specific option: %s", k)
return "", "", "", ErrInvalidAuthOpts
}
}
return jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath, nil
}
func newTokenProviderJWT(opts map[string]string) (*tokenJWT, error) {
jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath, err := prepareOpts(opts)
if err != nil {
return nil, ErrInvalidAuthOpts
}
t := &tokenJWT{}
t.signMethod = jwtSignMethod
verifyBytes, err := ioutil.ReadFile(jwtPubKeyPath)
if err != nil {
plog.Errorf("failed to read public key (%s) for jwt: %s", jwtPubKeyPath, err)
return nil, err
}
t.verifyKey, err = jwt.ParseRSAPublicKeyFromPEM(verifyBytes)
if err != nil {
plog.Errorf("failed to parse public key (%s): %s", jwtPubKeyPath, err)
return nil, err
}
signBytes, err := ioutil.ReadFile(jwtPrivKeyPath)
if err != nil {
plog.Errorf("failed to read private key (%s) for jwt: %s", jwtPrivKeyPath, err)
return nil, err
}
t.signKey, err = jwt.ParseRSAPrivateKeyFromPEM(signBytes)
if err != nil {
plog.Errorf("failed to parse private key (%s): %s", jwtPrivKeyPath, err)
return nil, err
}
return t, nil
}

133
vendor/github.com/coreos/etcd/auth/range_perm_cache.go generated vendored Normal file
View file

@ -0,0 +1,133 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"github.com/coreos/etcd/auth/authpb"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/adt"
)
func getMergedPerms(tx backend.BatchTx, userName string) *unifiedRangePermissions {
user := getUser(tx, userName)
if user == nil {
plog.Errorf("invalid user name %s", userName)
return nil
}
readPerms := &adt.IntervalTree{}
writePerms := &adt.IntervalTree{}
for _, roleName := range user.Roles {
role := getRole(tx, roleName)
if role == nil {
continue
}
for _, perm := range role.KeyPermission {
var ivl adt.Interval
var rangeEnd []byte
if len(perm.RangeEnd) != 1 || perm.RangeEnd[0] != 0 {
rangeEnd = perm.RangeEnd
}
if len(perm.RangeEnd) != 0 {
ivl = adt.NewBytesAffineInterval(perm.Key, rangeEnd)
} else {
ivl = adt.NewBytesAffinePoint(perm.Key)
}
switch perm.PermType {
case authpb.READWRITE:
readPerms.Insert(ivl, struct{}{})
writePerms.Insert(ivl, struct{}{})
case authpb.READ:
readPerms.Insert(ivl, struct{}{})
case authpb.WRITE:
writePerms.Insert(ivl, struct{}{})
}
}
}
return &unifiedRangePermissions{
readPerms: readPerms,
writePerms: writePerms,
}
}
func checkKeyInterval(cachedPerms *unifiedRangePermissions, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
rangeEnd = nil
}
ivl := adt.NewBytesAffineInterval(key, rangeEnd)
switch permtyp {
case authpb.READ:
return cachedPerms.readPerms.Contains(ivl)
case authpb.WRITE:
return cachedPerms.writePerms.Contains(ivl)
default:
plog.Panicf("unknown auth type: %v", permtyp)
}
return false
}
func checkKeyPoint(cachedPerms *unifiedRangePermissions, key []byte, permtyp authpb.Permission_Type) bool {
pt := adt.NewBytesAffinePoint(key)
switch permtyp {
case authpb.READ:
return cachedPerms.readPerms.Intersects(pt)
case authpb.WRITE:
return cachedPerms.writePerms.Intersects(pt)
default:
plog.Panicf("unknown auth type: %v", permtyp)
}
return false
}
func (as *authStore) isRangeOpPermitted(tx backend.BatchTx, userName string, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
// assumption: tx is Lock()ed
_, ok := as.rangePermCache[userName]
if !ok {
perms := getMergedPerms(tx, userName)
if perms == nil {
plog.Errorf("failed to create a unified permission of user %s", userName)
return false
}
as.rangePermCache[userName] = perms
}
if len(rangeEnd) == 0 {
return checkKeyPoint(as.rangePermCache[userName], key, permtyp)
}
return checkKeyInterval(as.rangePermCache[userName], key, rangeEnd, permtyp)
}
func (as *authStore) clearCachedPerm() {
as.rangePermCache = make(map[string]*unifiedRangePermissions)
}
func (as *authStore) invalidateCachedPerm(userName string) {
delete(as.rangePermCache, userName)
}
type unifiedRangePermissions struct {
readPerms *adt.IntervalTree
writePerms *adt.IntervalTree
}

220
vendor/github.com/coreos/etcd/auth/simple_token.go generated vendored Normal file
View file

@ -0,0 +1,220 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
// CAUTION: This randum number based token mechanism is only for testing purpose.
// JWT based mechanism will be added in the near future.
import (
"crypto/rand"
"fmt"
"math/big"
"strconv"
"strings"
"sync"
"time"
"golang.org/x/net/context"
)
const (
letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
defaultSimpleTokenLength = 16
)
// var for testing purposes
var (
simpleTokenTTL = 5 * time.Minute
simpleTokenTTLResolution = 1 * time.Second
)
type simpleTokenTTLKeeper struct {
tokens map[string]time.Time
donec chan struct{}
stopc chan struct{}
deleteTokenFunc func(string)
mu *sync.Mutex
}
func (tm *simpleTokenTTLKeeper) stop() {
select {
case tm.stopc <- struct{}{}:
case <-tm.donec:
}
<-tm.donec
}
func (tm *simpleTokenTTLKeeper) addSimpleToken(token string) {
tm.tokens[token] = time.Now().Add(simpleTokenTTL)
}
func (tm *simpleTokenTTLKeeper) resetSimpleToken(token string) {
if _, ok := tm.tokens[token]; ok {
tm.tokens[token] = time.Now().Add(simpleTokenTTL)
}
}
func (tm *simpleTokenTTLKeeper) deleteSimpleToken(token string) {
delete(tm.tokens, token)
}
func (tm *simpleTokenTTLKeeper) run() {
tokenTicker := time.NewTicker(simpleTokenTTLResolution)
defer func() {
tokenTicker.Stop()
close(tm.donec)
}()
for {
select {
case <-tokenTicker.C:
nowtime := time.Now()
tm.mu.Lock()
for t, tokenendtime := range tm.tokens {
if nowtime.After(tokenendtime) {
tm.deleteTokenFunc(t)
delete(tm.tokens, t)
}
}
tm.mu.Unlock()
case <-tm.stopc:
return
}
}
}
type tokenSimple struct {
indexWaiter func(uint64) <-chan struct{}
simpleTokenKeeper *simpleTokenTTLKeeper
simpleTokensMu sync.Mutex
simpleTokens map[string]string // token -> username
}
func (t *tokenSimple) genTokenPrefix() (string, error) {
ret := make([]byte, defaultSimpleTokenLength)
for i := 0; i < defaultSimpleTokenLength; i++ {
bInt, err := rand.Int(rand.Reader, big.NewInt(int64(len(letters))))
if err != nil {
return "", err
}
ret[i] = letters[bInt.Int64()]
}
return string(ret), nil
}
func (t *tokenSimple) assignSimpleTokenToUser(username, token string) {
t.simpleTokensMu.Lock()
_, ok := t.simpleTokens[token]
if ok {
plog.Panicf("token %s is alredy used", token)
}
t.simpleTokens[token] = username
t.simpleTokenKeeper.addSimpleToken(token)
t.simpleTokensMu.Unlock()
}
func (t *tokenSimple) invalidateUser(username string) {
if t.simpleTokenKeeper == nil {
return
}
t.simpleTokensMu.Lock()
for token, name := range t.simpleTokens {
if strings.Compare(name, username) == 0 {
delete(t.simpleTokens, token)
t.simpleTokenKeeper.deleteSimpleToken(token)
}
}
t.simpleTokensMu.Unlock()
}
func (t *tokenSimple) enable() {
delf := func(tk string) {
if username, ok := t.simpleTokens[tk]; ok {
plog.Infof("deleting token %s for user %s", tk, username)
delete(t.simpleTokens, tk)
}
}
t.simpleTokenKeeper = &simpleTokenTTLKeeper{
tokens: make(map[string]time.Time),
donec: make(chan struct{}),
stopc: make(chan struct{}),
deleteTokenFunc: delf,
mu: &t.simpleTokensMu,
}
go t.simpleTokenKeeper.run()
}
func (t *tokenSimple) disable() {
t.simpleTokensMu.Lock()
tk := t.simpleTokenKeeper
t.simpleTokenKeeper = nil
t.simpleTokens = make(map[string]string) // invalidate all tokens
t.simpleTokensMu.Unlock()
if tk != nil {
tk.stop()
}
}
func (t *tokenSimple) info(ctx context.Context, token string, revision uint64) (*AuthInfo, bool) {
if !t.isValidSimpleToken(ctx, token) {
return nil, false
}
t.simpleTokensMu.Lock()
username, ok := t.simpleTokens[token]
if ok && t.simpleTokenKeeper != nil {
t.simpleTokenKeeper.resetSimpleToken(token)
}
t.simpleTokensMu.Unlock()
return &AuthInfo{Username: username, Revision: revision}, ok
}
func (t *tokenSimple) assign(ctx context.Context, username string, rev uint64) (string, error) {
// rev isn't used in simple token, it is only used in JWT
index := ctx.Value("index").(uint64)
simpleToken := ctx.Value("simpleToken").(string)
token := fmt.Sprintf("%s.%d", simpleToken, index)
t.assignSimpleTokenToUser(username, token)
return token, nil
}
func (t *tokenSimple) isValidSimpleToken(ctx context.Context, token string) bool {
splitted := strings.Split(token, ".")
if len(splitted) != 2 {
return false
}
index, err := strconv.Atoi(splitted[1])
if err != nil {
return false
}
select {
case <-t.indexWaiter(uint64(index)):
return true
case <-ctx.Done():
}
return false
}
func newTokenProviderSimple(indexWaiter func(uint64) <-chan struct{}) *tokenSimple {
return &tokenSimple{
simpleTokens: make(map[string]string),
indexWaiter: indexWaiter,
}
}

1059
vendor/github.com/coreos/etcd/auth/store.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,86 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"sync"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
"github.com/coreos/pkg/capnslog"
)
type Capability string
const (
AuthCapability Capability = "auth"
V3rpcCapability Capability = "v3rpc"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdserver/api")
// capabilityMaps is a static map of version to capability map.
capabilityMaps = map[string]map[Capability]bool{
"3.0.0": {AuthCapability: true, V3rpcCapability: true},
"3.1.0": {AuthCapability: true, V3rpcCapability: true},
"3.2.0": {AuthCapability: true, V3rpcCapability: true},
}
enableMapMu sync.RWMutex
// enabledMap points to a map in capabilityMaps
enabledMap map[Capability]bool
curVersion *semver.Version
)
func init() {
enabledMap = map[Capability]bool{
AuthCapability: true,
V3rpcCapability: true,
}
}
// UpdateCapability updates the enabledMap when the cluster version increases.
func UpdateCapability(v *semver.Version) {
if v == nil {
// if recovered but version was never set by cluster
return
}
enableMapMu.Lock()
if curVersion != nil && !curVersion.LessThan(*v) {
enableMapMu.Unlock()
return
}
curVersion = v
enabledMap = capabilityMaps[curVersion.String()]
enableMapMu.Unlock()
plog.Infof("enabled capabilities for version %s", version.Cluster(v.String()))
}
func IsCapabilityEnabled(c Capability) bool {
enableMapMu.RLock()
defer enableMapMu.RUnlock()
if enabledMap == nil {
return false
}
return enabledMap[c]
}
func EnableCapability(c Capability) {
enableMapMu.Lock()
defer enableMapMu.Unlock()
enabledMap[c] = true
}

View file

@ -0,0 +1,41 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/go-semver/semver"
)
// Cluster is an interface representing a collection of members in one etcd cluster.
type Cluster interface {
// ID returns the cluster ID
ID() types.ID
// ClientURLs returns an aggregate set of all URLs on which this
// cluster is listening for client requests
ClientURLs() []string
// Members returns a slice of members sorted by their ID
Members() []*membership.Member
// Member retrieves a particular member based on ID, or nil if the
// member does not exist in the cluster
Member(id types.ID) *membership.Member
// IsIDRemoved checks whether the given ID has been removed from this
// cluster at some point in the past
IsIDRemoved(id types.ID) bool
// Version is the cluster-wide minimum major.minor version.
Version() *semver.Version
}

16
vendor/github.com/coreos/etcd/etcdserver/api/doc.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package api manages the capabilities and features that are exposed to clients by the etcd cluster.
package api

View file

@ -0,0 +1,157 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type AuthServer struct {
authenticator etcdserver.Authenticator
}
func NewAuthServer(s *etcdserver.EtcdServer) *AuthServer {
return &AuthServer{authenticator: s}
}
func (as *AuthServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
resp, err := as.authenticator.AuthEnable(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
resp, err := as.authenticator.AuthDisable(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
resp, err := as.authenticator.Authenticate(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := as.authenticator.RoleAdd(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := as.authenticator.RoleDelete(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := as.authenticator.RoleGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := as.authenticator.RoleList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := as.authenticator.RoleRevokePermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := as.authenticator.RoleGrantPermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := as.authenticator.UserAdd(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := as.authenticator.UserDelete(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := as.authenticator.UserGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := as.authenticator.UserList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := as.authenticator.UserGrantRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := as.authenticator.UserRevokeRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := as.authenticator.UserChangePassword(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}

View file

@ -0,0 +1,34 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/gogo/protobuf/proto"
type codec struct{}
func (c *codec) Marshal(v interface{}) ([]byte, error) {
b, err := proto.Marshal(v.(proto.Message))
sentBytes.Add(float64(len(b)))
return b, err
}
func (c *codec) Unmarshal(data []byte, v interface{}) error {
receivedBytes.Add(float64(len(data)))
return proto.Unmarshal(data, v.(proto.Message))
}
func (c *codec) String() string {
return "proto"
}

View file

@ -0,0 +1,53 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"crypto/tls"
"math"
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/grpclog"
)
const maxStreams = math.MaxUint32
func init() {
grpclog.SetLogger(plog)
}
func Server(s *etcdserver.EtcdServer, tls *tls.Config) *grpc.Server {
var opts []grpc.ServerOption
opts = append(opts, grpc.CustomCodec(&codec{}))
if tls != nil {
opts = append(opts, grpc.Creds(credentials.NewTLS(tls)))
}
opts = append(opts, grpc.UnaryInterceptor(newUnaryInterceptor(s)))
opts = append(opts, grpc.StreamInterceptor(newStreamInterceptor(s)))
opts = append(opts, grpc.MaxConcurrentStreams(maxStreams))
grpcServer := grpc.NewServer(opts...)
pb.RegisterKVServer(grpcServer, NewQuotaKVServer(s))
pb.RegisterWatchServer(grpcServer, NewWatchServer(s))
pb.RegisterLeaseServer(grpcServer, NewQuotaLeaseServer(s))
pb.RegisterClusterServer(grpcServer, NewClusterServer(s))
pb.RegisterAuthServer(grpcServer, NewAuthServer(s))
pb.RegisterMaintenanceServer(grpcServer, NewMaintenanceServer(s))
return grpcServer
}

View file

@ -0,0 +1,46 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
type header struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
rev func() int64
}
func newHeader(s *etcdserver.EtcdServer) header {
return header{
clusterID: int64(s.Cluster().ID()),
memberID: int64(s.ID()),
raftTimer: s,
rev: func() int64 { return s.KV().Rev() },
}
}
// fill populates pb.ResponseHeader using etcdserver information
func (h *header) fill(rh *pb.ResponseHeader) {
rh.ClusterId = uint64(h.clusterID)
rh.MemberId = uint64(h.memberID)
rh.RaftTerm = h.raftTimer.Term()
if rh.Revision == 0 {
rh.Revision = h.rev()
}
}

View file

@ -0,0 +1,144 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
const (
maxNoLeaderCnt = 3
)
type streamsMap struct {
mu sync.Mutex
streams map[grpc.ServerStream]struct{}
}
func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return nil, rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ctx)
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return nil, rpctypes.ErrGRPCNoLeader
}
}
}
return prometheus.UnaryServerInterceptor(ctx, req, info, handler)
}
}
func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor {
smap := monitorLeader(s)
return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ss.Context())
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return rpctypes.ErrGRPCNoLeader
}
cctx, cancel := context.WithCancel(ss.Context())
ss = serverStreamWithCtx{ctx: cctx, cancel: &cancel, ServerStream: ss}
smap.mu.Lock()
smap.streams[ss] = struct{}{}
smap.mu.Unlock()
defer func() {
smap.mu.Lock()
delete(smap.streams, ss)
smap.mu.Unlock()
cancel()
}()
}
}
return prometheus.StreamServerInterceptor(srv, ss, info, handler)
}
}
type serverStreamWithCtx struct {
grpc.ServerStream
ctx context.Context
cancel *context.CancelFunc
}
func (ssc serverStreamWithCtx) Context() context.Context { return ssc.ctx }
func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
smap := &streamsMap{
streams: make(map[grpc.ServerStream]struct{}),
}
go func() {
election := time.Duration(s.Cfg.TickMs) * time.Duration(s.Cfg.ElectionTicks) * time.Millisecond
noLeaderCnt := 0
for {
select {
case <-s.StopNotify():
return
case <-time.After(election):
if s.Leader() == types.ID(raft.None) {
noLeaderCnt++
} else {
noLeaderCnt = 0
}
// We are more conservative on canceling existing streams. Reconnecting streams
// cost much more than just rejecting new requests. So we wait until the member
// cannot find a leader for maxNoLeaderCnt election timeouts to cancel existing streams.
if noLeaderCnt >= maxNoLeaderCnt {
smap.mu.Lock()
for ss := range smap.streams {
if ssWithCtx, ok := ss.(serverStreamWithCtx); ok {
(*ssWithCtx.cancel)()
<-ss.Context().Done()
}
}
smap.streams = make(map[grpc.ServerStream]struct{})
smap.mu.Unlock()
}
}
}
}()
return smap
}

View file

@ -0,0 +1,259 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package v3rpc implements etcd v3 RPC system based on gRPC.
package v3rpc
import (
"sort"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/pkg/capnslog"
"golang.org/x/net/context"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdserver/api/v3rpc")
// Max operations per txn list. For example, Txn.Success can have at most 128 operations,
// and Txn.Failure can have at most 128 operations.
MaxOpsPerTxn = 128
)
type kvServer struct {
hdr header
kv etcdserver.RaftKV
}
func NewKVServer(s *etcdserver.EtcdServer) pb.KVServer {
return &kvServer{hdr: newHeader(s), kv: s}
}
func (s *kvServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if err := checkRangeRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Range(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := checkPutRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Put(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
if err := checkDeleteRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.DeleteRange(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := checkTxnRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Txn(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
resp, err := s.kv.Compact(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func checkRangeRequest(r *pb.RangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkPutRequest(r *pb.PutRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
if r.IgnoreValue && len(r.Value) != 0 {
return rpctypes.ErrGRPCValueProvided
}
if r.IgnoreLease && r.Lease != 0 {
return rpctypes.ErrGRPCLeaseProvided
}
return nil
}
func checkDeleteRequest(r *pb.DeleteRangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkTxnRequest(r *pb.TxnRequest) error {
if len(r.Compare) > MaxOpsPerTxn || len(r.Success) > MaxOpsPerTxn || len(r.Failure) > MaxOpsPerTxn {
return rpctypes.ErrGRPCTooManyOps
}
for _, c := range r.Compare {
if len(c.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
}
for _, u := range r.Success {
if err := checkRequestOp(u); err != nil {
return err
}
}
if err := checkRequestDupKeys(r.Success); err != nil {
return err
}
for _, u := range r.Failure {
if err := checkRequestOp(u); err != nil {
return err
}
}
return checkRequestDupKeys(r.Failure)
}
// checkRequestDupKeys gives rpctypes.ErrGRPCDuplicateKey if the same key is modified twice
func checkRequestDupKeys(reqs []*pb.RequestOp) error {
// check put overlap
keys := make(map[string]struct{})
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
preq := tv.RequestPut
if preq == nil {
continue
}
if _, ok := keys[string(preq.Key)]; ok {
return rpctypes.ErrGRPCDuplicateKey
}
keys[string(preq.Key)] = struct{}{}
}
// no need to check deletes if no puts; delete overlaps are permitted
if len(keys) == 0 {
return nil
}
// sort keys for range checking
sortedKeys := []string{}
for k := range keys {
sortedKeys = append(sortedKeys, k)
}
sort.Strings(sortedKeys)
// check put overlap with deletes
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestDeleteRange)
if !ok {
continue
}
dreq := tv.RequestDeleteRange
if dreq == nil {
continue
}
if dreq.RangeEnd == nil {
if _, found := keys[string(dreq.Key)]; found {
return rpctypes.ErrGRPCDuplicateKey
}
} else {
lo := sort.SearchStrings(sortedKeys, string(dreq.Key))
hi := sort.SearchStrings(sortedKeys, string(dreq.RangeEnd))
if lo != hi {
// element between lo and hi => overlap
return rpctypes.ErrGRPCDuplicateKey
}
}
}
return nil
}
func checkRequestOp(u *pb.RequestOp) error {
// TODO: ensure only one of the field is set.
switch uv := u.Request.(type) {
case *pb.RequestOp_RequestRange:
if uv.RequestRange != nil {
return checkRangeRequest(uv.RequestRange)
}
case *pb.RequestOp_RequestPut:
if uv.RequestPut != nil {
return checkPutRequest(uv.RequestPut)
}
case *pb.RequestOp_RequestDeleteRange:
if uv.RequestDeleteRange != nil {
return checkDeleteRequest(uv.RequestDeleteRange)
}
default:
// empty op / nil entry
return rpctypes.ErrGRPCKeyNotFound
}
return nil
}

View file

@ -0,0 +1,123 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"io"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"golang.org/x/net/context"
)
type LeaseServer struct {
hdr header
le etcdserver.Lessor
}
func NewLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
return &LeaseServer{le: s, hdr: newHeader(s)}
}
func (ls *LeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
resp, err := ls.le.LeaseGrant(ctx, cr)
if err != nil {
return nil, togRPCError(err)
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseRevoke(ctx context.Context, rr *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
resp, err := ls.le.LeaseRevoke(ctx, rr)
if err != nil {
return nil, togRPCError(err)
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseTimeToLive(ctx context.Context, rr *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
resp, err := ls.le.LeaseTimeToLive(ctx, rr)
if err != nil && err != lease.ErrLeaseNotFound {
return nil, togRPCError(err)
}
if err == lease.ErrLeaseNotFound {
resp = &pb.LeaseTimeToLiveResponse{
Header: &pb.ResponseHeader{},
ID: rr.ID,
TTL: -1,
}
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) (err error) {
errc := make(chan error, 1)
go func() {
errc <- ls.leaseKeepAlive(stream)
}()
select {
case err = <-errc:
case <-stream.Context().Done():
// the only server-side cancellation is noleader for now.
err = stream.Context().Err()
if err == context.Canceled {
err = rpctypes.ErrGRPCNoLeader
}
}
return err
}
func (ls *LeaseServer) leaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) error {
for {
req, err := stream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
// Create header before we sent out the renew request.
// This can make sure that the revision is strictly smaller or equal to
// when the keepalive happened at the local server (when the local server is the leader)
// or remote leader.
// Without this, a lease might be revoked at rev 3 but client can see the keepalive succeeded
// at rev 4.
resp := &pb.LeaseKeepAliveResponse{ID: req.ID, Header: &pb.ResponseHeader{}}
ls.hdr.fill(resp.Header)
ttl, err := ls.le.LeaseRenew(stream.Context(), lease.LeaseID(req.ID))
if err == lease.ErrLeaseNotFound {
err = nil
ttl = 0
}
if err != nil {
return togRPCError(err)
}
resp.TTL = ttl
err = stream.Send(resp)
if err != nil {
return err
}
}
}

View file

@ -0,0 +1,190 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"crypto/sha256"
"io"
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"golang.org/x/net/context"
)
type KVGetter interface {
KV() mvcc.ConsistentWatchableKV
}
type BackendGetter interface {
Backend() backend.Backend
}
type Alarmer interface {
Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error)
}
type RaftStatusGetter interface {
Index() uint64
Term() uint64
Leader() types.ID
}
type AuthGetter interface {
AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error)
AuthStore() auth.AuthStore
}
type maintenanceServer struct {
rg RaftStatusGetter
kg KVGetter
bg BackendGetter
a Alarmer
hdr header
}
func NewMaintenanceServer(s *etcdserver.EtcdServer) pb.MaintenanceServer {
srv := &maintenanceServer{rg: s, kg: s, bg: s, a: s, hdr: newHeader(s)}
return &authMaintenanceServer{srv, s}
}
func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
plog.Noticef("starting to defragment the storage backend...")
err := ms.bg.Backend().Defrag()
if err != nil {
plog.Errorf("failed to defragment the storage backend (%v)", err)
return nil, err
}
plog.Noticef("finished defragmenting the storage backend")
return &pb.DefragmentResponse{}, nil
}
func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
snap := ms.bg.Backend().Snapshot()
pr, pw := io.Pipe()
defer pr.Close()
go func() {
snap.WriteTo(pw)
if err := snap.Close(); err != nil {
plog.Errorf("error closing snapshot (%v)", err)
}
pw.Close()
}()
// send file data
h := sha256.New()
br := int64(0)
buf := make([]byte, 32*1024)
sz := snap.Size()
for br < sz {
n, err := io.ReadFull(pr, buf)
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
return togRPCError(err)
}
br += int64(n)
resp := &pb.SnapshotResponse{
RemainingBytes: uint64(sz - br),
Blob: buf[:n],
}
if err = srv.Send(resp); err != nil {
return togRPCError(err)
}
h.Write(buf[:n])
}
// send sha
sha := h.Sum(nil)
hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha}
if err := srv.Send(hresp); err != nil {
return togRPCError(err)
}
return nil
}
func (ms *maintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
h, rev, err := ms.kg.KV().Hash()
if err != nil {
return nil, togRPCError(err)
}
resp := &pb.HashResponse{Header: &pb.ResponseHeader{Revision: rev}, Hash: h}
ms.hdr.fill(resp.Header)
return resp, nil
}
func (ms *maintenanceServer) Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
return ms.a.Alarm(ctx, ar)
}
func (ms *maintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
resp := &pb.StatusResponse{
Header: &pb.ResponseHeader{Revision: ms.hdr.rev()},
Version: version.Version,
DbSize: ms.bg.Backend().Size(),
Leader: uint64(ms.rg.Leader()),
RaftIndex: ms.rg.Index(),
RaftTerm: ms.rg.Term(),
}
ms.hdr.fill(resp.Header)
return resp, nil
}
type authMaintenanceServer struct {
*maintenanceServer
ag AuthGetter
}
func (ams *authMaintenanceServer) isAuthenticated(ctx context.Context) error {
authInfo, err := ams.ag.AuthInfoFromCtx(ctx)
if err != nil {
return err
}
return ams.ag.AuthStore().IsAdminPermitted(authInfo)
}
func (ams *authMaintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
if err := ams.isAuthenticated(ctx); err != nil {
return nil, err
}
return ams.maintenanceServer.Defragment(ctx, sr)
}
func (ams *authMaintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
if err := ams.isAuthenticated(srv.Context()); err != nil {
return err
}
return ams.maintenanceServer.Snapshot(sr, srv)
}
func (ams *authMaintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
if err := ams.isAuthenticated(ctx); err != nil {
return nil, err
}
return ams.maintenanceServer.Hash(ctx, r)
}
func (ams *authMaintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
return ams.maintenanceServer.Status(ctx, ar)
}

View file

@ -0,0 +1,103 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"golang.org/x/net/context"
)
type ClusterServer struct {
cluster api.Cluster
server etcdserver.Server
raftTimer etcdserver.RaftTimer
}
func NewClusterServer(s *etcdserver.EtcdServer) *ClusterServer {
return &ClusterServer{
cluster: s.Cluster(),
server: s,
raftTimer: s,
}
}
func (cs *ClusterServer) MemberAdd(ctx context.Context, r *pb.MemberAddRequest) (*pb.MemberAddResponse, error) {
urls, err := types.NewURLs(r.PeerURLs)
if err != nil {
return nil, rpctypes.ErrGRPCMemberBadURLs
}
now := time.Now()
m := membership.NewMember("", urls, "", &now)
membs, merr := cs.server.AddMember(ctx, *m)
if merr != nil {
return nil, togRPCError(merr)
}
return &pb.MemberAddResponse{
Header: cs.header(),
Member: &pb.Member{ID: uint64(m.ID), PeerURLs: m.PeerURLs},
Members: membersToProtoMembers(membs),
}, nil
}
func (cs *ClusterServer) MemberRemove(ctx context.Context, r *pb.MemberRemoveRequest) (*pb.MemberRemoveResponse, error) {
membs, err := cs.server.RemoveMember(ctx, r.ID)
if err != nil {
return nil, togRPCError(err)
}
return &pb.MemberRemoveResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
}
func (cs *ClusterServer) MemberUpdate(ctx context.Context, r *pb.MemberUpdateRequest) (*pb.MemberUpdateResponse, error) {
m := membership.Member{
ID: types.ID(r.ID),
RaftAttributes: membership.RaftAttributes{PeerURLs: r.PeerURLs},
}
membs, err := cs.server.UpdateMember(ctx, m)
if err != nil {
return nil, togRPCError(err)
}
return &pb.MemberUpdateResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
}
func (cs *ClusterServer) MemberList(ctx context.Context, r *pb.MemberListRequest) (*pb.MemberListResponse, error) {
membs := membersToProtoMembers(cs.cluster.Members())
return &pb.MemberListResponse{Header: cs.header(), Members: membs}, nil
}
func (cs *ClusterServer) header() *pb.ResponseHeader {
return &pb.ResponseHeader{ClusterId: uint64(cs.cluster.ID()), MemberId: uint64(cs.server.ID()), RaftTerm: cs.raftTimer.Term()}
}
func membersToProtoMembers(membs []*membership.Member) []*pb.Member {
protoMembs := make([]*pb.Member, len(membs))
for i := range membs {
protoMembs[i] = &pb.Member{
Name: membs[i].Name,
ID: uint64(membs[i].ID),
PeerURLs: membs[i].PeerURLs,
ClientURLs: membs[i].ClientURLs,
}
}
return protoMembs
}

View file

@ -0,0 +1,38 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/prometheus/client_golang/prometheus"
var (
sentBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_sent_bytes_total",
Help: "The total number of bytes sent to grpc clients.",
})
receivedBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_received_bytes_total",
Help: "The total number of bytes received from grpc clients.",
})
)
func init() {
prometheus.MustRegister(sentBytes)
prometheus.MustRegister(receivedBytes)
}

View file

@ -0,0 +1,89 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/types"
"golang.org/x/net/context"
)
type quotaKVServer struct {
pb.KVServer
qa quotaAlarmer
}
type quotaAlarmer struct {
q etcdserver.Quota
a Alarmer
id types.ID
}
// check whether request satisfies the quota. If there is not enough space,
// ignore request and raise the free space alarm.
func (qa *quotaAlarmer) check(ctx context.Context, r interface{}) error {
if qa.q.Available(r) {
return nil
}
req := &pb.AlarmRequest{
MemberID: uint64(qa.id),
Action: pb.AlarmRequest_ACTIVATE,
Alarm: pb.AlarmType_NOSPACE,
}
qa.a.Alarm(ctx, req)
return rpctypes.ErrGRPCNoSpace
}
func NewQuotaKVServer(s *etcdserver.EtcdServer) pb.KVServer {
return &quotaKVServer{
NewKVServer(s),
quotaAlarmer{etcdserver.NewBackendQuota(s), s, s.ID()},
}
}
func (s *quotaKVServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := s.qa.check(ctx, r); err != nil {
return nil, err
}
return s.KVServer.Put(ctx, r)
}
func (s *quotaKVServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := s.qa.check(ctx, r); err != nil {
return nil, err
}
return s.KVServer.Txn(ctx, r)
}
type quotaLeaseServer struct {
pb.LeaseServer
qa quotaAlarmer
}
func (s *quotaLeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
if err := s.qa.check(ctx, cr); err != nil {
return nil, err
}
return s.LeaseServer.LeaseGrant(ctx, cr)
}
func NewQuotaLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
return &quotaLeaseServer{
NewLeaseServer(s),
quotaAlarmer{etcdserver.NewBackendQuota(s), s, s.ID()},
}
}

View file

@ -0,0 +1,103 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
func togRPCError(err error) error {
switch err {
case membership.ErrIDRemoved:
return rpctypes.ErrGRPCMemberNotFound
case membership.ErrIDNotFound:
return rpctypes.ErrGRPCMemberNotFound
case membership.ErrIDExists:
return rpctypes.ErrGRPCMemberExist
case membership.ErrPeerURLexists:
return rpctypes.ErrGRPCPeerURLExist
case etcdserver.ErrNotEnoughStartedMembers:
return rpctypes.ErrMemberNotEnoughStarted
case mvcc.ErrCompacted:
return rpctypes.ErrGRPCCompacted
case mvcc.ErrFutureRev:
return rpctypes.ErrGRPCFutureRev
case etcdserver.ErrRequestTooLarge:
return rpctypes.ErrGRPCRequestTooLarge
case etcdserver.ErrNoSpace:
return rpctypes.ErrGRPCNoSpace
case etcdserver.ErrTooManyRequests:
return rpctypes.ErrTooManyRequests
case etcdserver.ErrNoLeader:
return rpctypes.ErrGRPCNoLeader
case etcdserver.ErrStopped:
return rpctypes.ErrGRPCStopped
case etcdserver.ErrTimeout:
return rpctypes.ErrGRPCTimeout
case etcdserver.ErrTimeoutDueToLeaderFail:
return rpctypes.ErrGRPCTimeoutDueToLeaderFail
case etcdserver.ErrTimeoutDueToConnectionLost:
return rpctypes.ErrGRPCTimeoutDueToConnectionLost
case etcdserver.ErrUnhealthy:
return rpctypes.ErrGRPCUnhealthy
case etcdserver.ErrKeyNotFound:
return rpctypes.ErrGRPCKeyNotFound
case lease.ErrLeaseNotFound:
return rpctypes.ErrGRPCLeaseNotFound
case lease.ErrLeaseExists:
return rpctypes.ErrGRPCLeaseExist
case auth.ErrRootUserNotExist:
return rpctypes.ErrGRPCRootUserNotExist
case auth.ErrRootRoleNotExist:
return rpctypes.ErrGRPCRootRoleNotExist
case auth.ErrUserAlreadyExist:
return rpctypes.ErrGRPCUserAlreadyExist
case auth.ErrUserEmpty:
return rpctypes.ErrGRPCUserEmpty
case auth.ErrUserNotFound:
return rpctypes.ErrGRPCUserNotFound
case auth.ErrRoleAlreadyExist:
return rpctypes.ErrGRPCRoleAlreadyExist
case auth.ErrRoleNotFound:
return rpctypes.ErrGRPCRoleNotFound
case auth.ErrAuthFailed:
return rpctypes.ErrGRPCAuthFailed
case auth.ErrPermissionDenied:
return rpctypes.ErrGRPCPermissionDenied
case auth.ErrRoleNotGranted:
return rpctypes.ErrGRPCRoleNotGranted
case auth.ErrPermissionNotGranted:
return rpctypes.ErrGRPCPermissionNotGranted
case auth.ErrAuthNotEnabled:
return rpctypes.ErrGRPCAuthNotEnabled
case auth.ErrInvalidAuthToken:
return rpctypes.ErrGRPCInvalidAuthToken
case auth.ErrInvalidAuthMgmt:
return rpctypes.ErrGRPCInvalidAuthMgmt
default:
return grpc.Errorf(codes.Unknown, err.Error())
}
}

View file

@ -0,0 +1,426 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"io"
"sync"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type watchServer struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.WatchableKV
ag AuthGetter
}
func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
return &watchServer{
clusterID: int64(s.Cluster().ID()),
memberID: int64(s.ID()),
raftTimer: s,
watchable: s.Watchable(),
ag: s,
}
}
var (
// External test can read this with GetProgressReportInterval()
// and change this to a small value to finish fast with
// SetProgressReportInterval().
progressReportInterval = 10 * time.Minute
progressReportIntervalMu sync.RWMutex
)
func GetProgressReportInterval() time.Duration {
progressReportIntervalMu.RLock()
defer progressReportIntervalMu.RUnlock()
return progressReportInterval
}
func SetProgressReportInterval(newTimeout time.Duration) {
progressReportIntervalMu.Lock()
defer progressReportIntervalMu.Unlock()
progressReportInterval = newTimeout
}
const (
// We send ctrl response inside the read loop. We do not want
// send to block read, but we still want ctrl response we sent to
// be serialized. Thus we use a buffered chan to solve the problem.
// A small buffer should be OK for most cases, since we expect the
// ctrl requests are infrequent.
ctrlStreamBufLen = 16
)
// serverWatchStream is an etcd server side stream. It receives requests
// from client side gRPC stream. It receives watch events from mvcc.WatchStream,
// and creates responses that forwarded to gRPC stream.
// It also forwards control message like watch created and canceled.
type serverWatchStream struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.WatchableKV
gRPCStream pb.Watch_WatchServer
watchStream mvcc.WatchStream
ctrlStream chan *pb.WatchResponse
// mu protects progress, prevKV
mu sync.Mutex
// progress tracks the watchID that stream might need to send
// progress to.
// TODO: combine progress and prevKV into a single struct?
progress map[mvcc.WatchID]bool
prevKV map[mvcc.WatchID]bool
// closec indicates the stream is closed.
closec chan struct{}
// wg waits for the send loop to complete
wg sync.WaitGroup
ag AuthGetter
}
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
sws := serverWatchStream{
clusterID: ws.clusterID,
memberID: ws.memberID,
raftTimer: ws.raftTimer,
watchable: ws.watchable,
gRPCStream: stream,
watchStream: ws.watchable.NewWatchStream(),
// chan for sending control response like watcher created and canceled.
ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
progress: make(map[mvcc.WatchID]bool),
prevKV: make(map[mvcc.WatchID]bool),
closec: make(chan struct{}),
ag: ws.ag,
}
sws.wg.Add(1)
go func() {
sws.sendLoop()
sws.wg.Done()
}()
errc := make(chan error, 1)
// Ideally recvLoop would also use sws.wg to signal its completion
// but when stream.Context().Done() is closed, the stream's recv
// may continue to block since it uses a different context, leading to
// deadlock when calling sws.close().
go func() {
if rerr := sws.recvLoop(); rerr != nil {
errc <- rerr
}
}()
select {
case err = <-errc:
close(sws.ctrlStream)
case <-stream.Context().Done():
err = stream.Context().Err()
// the only server-side cancellation is noleader for now.
if err == context.Canceled {
err = rpctypes.ErrGRPCNoLeader
}
}
sws.close()
return err
}
func (sws *serverWatchStream) isWatchPermitted(wcr *pb.WatchCreateRequest) bool {
authInfo, err := sws.ag.AuthInfoFromCtx(sws.gRPCStream.Context())
if err != nil {
return false
}
if authInfo == nil {
// if auth is enabled, IsRangePermitted() can cause an error
authInfo = &auth.AuthInfo{}
}
return sws.ag.AuthStore().IsRangePermitted(authInfo, wcr.Key, wcr.RangeEnd) == nil
}
func (sws *serverWatchStream) recvLoop() error {
for {
req, err := sws.gRPCStream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
switch uv := req.RequestUnion.(type) {
case *pb.WatchRequest_CreateRequest:
if uv.CreateRequest == nil {
break
}
creq := uv.CreateRequest
if len(creq.Key) == 0 {
// \x00 is the smallest key
creq.Key = []byte{0}
}
if len(creq.RangeEnd) == 0 {
// force nil since watchstream.Watch distinguishes
// between nil and []byte{} for single key / >=
creq.RangeEnd = nil
}
if len(creq.RangeEnd) == 1 && creq.RangeEnd[0] == 0 {
// support >= key queries
creq.RangeEnd = []byte{}
}
if !sws.isWatchPermitted(creq) {
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(sws.watchStream.Rev()),
WatchId: -1,
Canceled: true,
Created: true,
CancelReason: rpctypes.ErrGRPCPermissionDenied.Error(),
}
select {
case sws.ctrlStream <- wr:
case <-sws.closec:
}
return nil
}
filters := FiltersFromRequest(creq)
wsrev := sws.watchStream.Rev()
rev := creq.StartRevision
if rev == 0 {
rev = wsrev + 1
}
id := sws.watchStream.Watch(creq.Key, creq.RangeEnd, rev, filters...)
if id != -1 {
sws.mu.Lock()
if creq.ProgressNotify {
sws.progress[id] = true
}
if creq.PrevKv {
sws.prevKV[id] = true
}
sws.mu.Unlock()
}
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(wsrev),
WatchId: int64(id),
Created: true,
Canceled: id == -1,
}
select {
case sws.ctrlStream <- wr:
case <-sws.closec:
return nil
}
case *pb.WatchRequest_CancelRequest:
if uv.CancelRequest != nil {
id := uv.CancelRequest.WatchId
err := sws.watchStream.Cancel(mvcc.WatchID(id))
if err == nil {
sws.ctrlStream <- &pb.WatchResponse{
Header: sws.newResponseHeader(sws.watchStream.Rev()),
WatchId: id,
Canceled: true,
}
sws.mu.Lock()
delete(sws.progress, mvcc.WatchID(id))
delete(sws.prevKV, mvcc.WatchID(id))
sws.mu.Unlock()
}
}
default:
// we probably should not shutdown the entire stream when
// receive an valid command.
// so just do nothing instead.
continue
}
}
}
func (sws *serverWatchStream) sendLoop() {
// watch ids that are currently active
ids := make(map[mvcc.WatchID]struct{})
// watch responses pending on a watch id creation message
pending := make(map[mvcc.WatchID][]*pb.WatchResponse)
interval := GetProgressReportInterval()
progressTicker := time.NewTicker(interval)
defer func() {
progressTicker.Stop()
// drain the chan to clean up pending events
for ws := range sws.watchStream.Chan() {
mvcc.ReportEventReceived(len(ws.Events))
}
for _, wrs := range pending {
for _, ws := range wrs {
mvcc.ReportEventReceived(len(ws.Events))
}
}
}()
for {
select {
case wresp, ok := <-sws.watchStream.Chan():
if !ok {
return
}
// TODO: evs is []mvccpb.Event type
// either return []*mvccpb.Event from the mvcc package
// or define protocol buffer with []mvccpb.Event.
evs := wresp.Events
events := make([]*mvccpb.Event, len(evs))
sws.mu.Lock()
needPrevKV := sws.prevKV[wresp.WatchID]
sws.mu.Unlock()
for i := range evs {
events[i] = &evs[i]
if needPrevKV {
opt := mvcc.RangeOptions{Rev: evs[i].Kv.ModRevision - 1}
r, err := sws.watchable.Range(evs[i].Kv.Key, nil, opt)
if err == nil && len(r.KVs) != 0 {
events[i].PrevKv = &(r.KVs[0])
}
}
}
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(wresp.Revision),
WatchId: int64(wresp.WatchID),
Events: events,
CompactRevision: wresp.CompactRevision,
}
if _, hasId := ids[wresp.WatchID]; !hasId {
// buffer if id not yet announced
wrs := append(pending[wresp.WatchID], wr)
pending[wresp.WatchID] = wrs
continue
}
mvcc.ReportEventReceived(len(evs))
if err := sws.gRPCStream.Send(wr); err != nil {
return
}
sws.mu.Lock()
if len(evs) > 0 && sws.progress[wresp.WatchID] {
// elide next progress update if sent a key update
sws.progress[wresp.WatchID] = false
}
sws.mu.Unlock()
case c, ok := <-sws.ctrlStream:
if !ok {
return
}
if err := sws.gRPCStream.Send(c); err != nil {
return
}
// track id creation
wid := mvcc.WatchID(c.WatchId)
if c.Canceled {
delete(ids, wid)
continue
}
if c.Created {
// flush buffered events
ids[wid] = struct{}{}
for _, v := range pending[wid] {
mvcc.ReportEventReceived(len(v.Events))
if err := sws.gRPCStream.Send(v); err != nil {
return
}
}
delete(pending, wid)
}
case <-progressTicker.C:
sws.mu.Lock()
for id, ok := range sws.progress {
if ok {
sws.watchStream.RequestProgress(id)
}
sws.progress[id] = true
}
sws.mu.Unlock()
case <-sws.closec:
return
}
}
}
func (sws *serverWatchStream) close() {
sws.watchStream.Close()
close(sws.closec)
sws.wg.Wait()
}
func (sws *serverWatchStream) newResponseHeader(rev int64) *pb.ResponseHeader {
return &pb.ResponseHeader{
ClusterId: uint64(sws.clusterID),
MemberId: uint64(sws.memberID),
Revision: rev,
RaftTerm: sws.raftTimer.Term(),
}
}
func filterNoDelete(e mvccpb.Event) bool {
return e.Type == mvccpb.DELETE
}
func filterNoPut(e mvccpb.Event) bool {
return e.Type == mvccpb.PUT
}
func FiltersFromRequest(creq *pb.WatchCreateRequest) []mvcc.FilterFunc {
filters := make([]mvcc.FilterFunc, 0, len(creq.Filters))
for _, ft := range creq.Filters {
switch ft {
case pb.WatchCreateRequest_NOPUT:
filters = append(filters, filterNoPut)
case pb.WatchCreateRequest_NODELETE:
filters = append(filters, filterNoDelete)
default:
}
}
return filters
}

878
vendor/github.com/coreos/etcd/etcdserver/apply.go generated vendored Normal file
View file

@ -0,0 +1,878 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"sort"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/types"
"github.com/gogo/protobuf/proto"
"golang.org/x/net/context"
)
const (
warnApplyDuration = 100 * time.Millisecond
)
type applyResult struct {
resp proto.Message
err error
// physc signals the physical effect of the request has completed in addition
// to being logically reflected by the node. Currently only used for
// Compaction requests.
physc <-chan struct{}
}
// applierV3 is the interface for processing V3 raft messages
type applierV3 interface {
Apply(r *pb.InternalRaftRequest) *applyResult
Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error)
Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error)
DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error)
Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error)
LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error)
AuthEnable() (*pb.AuthEnableResponse, error)
AuthDisable() (*pb.AuthDisableResponse, error)
UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ua *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ua *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ua *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ua *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ua *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ua *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ua *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ua *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ua *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ua *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ua *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
type applierV3backend struct {
s *EtcdServer
}
func (s *EtcdServer) newApplierV3() applierV3 {
return newAuthApplierV3(
s.AuthStore(),
newQuotaApplierV3(s, &applierV3backend{s}),
)
}
func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult {
ar := &applyResult{}
// call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls
switch {
case r.Range != nil:
ar.resp, ar.err = a.s.applyV3.Range(nil, r.Range)
case r.Put != nil:
ar.resp, ar.err = a.s.applyV3.Put(nil, r.Put)
case r.DeleteRange != nil:
ar.resp, ar.err = a.s.applyV3.DeleteRange(nil, r.DeleteRange)
case r.Txn != nil:
ar.resp, ar.err = a.s.applyV3.Txn(r.Txn)
case r.Compaction != nil:
ar.resp, ar.physc, ar.err = a.s.applyV3.Compaction(r.Compaction)
case r.LeaseGrant != nil:
ar.resp, ar.err = a.s.applyV3.LeaseGrant(r.LeaseGrant)
case r.LeaseRevoke != nil:
ar.resp, ar.err = a.s.applyV3.LeaseRevoke(r.LeaseRevoke)
case r.Alarm != nil:
ar.resp, ar.err = a.s.applyV3.Alarm(r.Alarm)
case r.Authenticate != nil:
ar.resp, ar.err = a.s.applyV3.Authenticate(r.Authenticate)
case r.AuthEnable != nil:
ar.resp, ar.err = a.s.applyV3.AuthEnable()
case r.AuthDisable != nil:
ar.resp, ar.err = a.s.applyV3.AuthDisable()
case r.AuthUserAdd != nil:
ar.resp, ar.err = a.s.applyV3.UserAdd(r.AuthUserAdd)
case r.AuthUserDelete != nil:
ar.resp, ar.err = a.s.applyV3.UserDelete(r.AuthUserDelete)
case r.AuthUserChangePassword != nil:
ar.resp, ar.err = a.s.applyV3.UserChangePassword(r.AuthUserChangePassword)
case r.AuthUserGrantRole != nil:
ar.resp, ar.err = a.s.applyV3.UserGrantRole(r.AuthUserGrantRole)
case r.AuthUserGet != nil:
ar.resp, ar.err = a.s.applyV3.UserGet(r.AuthUserGet)
case r.AuthUserRevokeRole != nil:
ar.resp, ar.err = a.s.applyV3.UserRevokeRole(r.AuthUserRevokeRole)
case r.AuthRoleAdd != nil:
ar.resp, ar.err = a.s.applyV3.RoleAdd(r.AuthRoleAdd)
case r.AuthRoleGrantPermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleGrantPermission(r.AuthRoleGrantPermission)
case r.AuthRoleGet != nil:
ar.resp, ar.err = a.s.applyV3.RoleGet(r.AuthRoleGet)
case r.AuthRoleRevokePermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleRevokePermission(r.AuthRoleRevokePermission)
case r.AuthRoleDelete != nil:
ar.resp, ar.err = a.s.applyV3.RoleDelete(r.AuthRoleDelete)
case r.AuthUserList != nil:
ar.resp, ar.err = a.s.applyV3.UserList(r.AuthUserList)
case r.AuthRoleList != nil:
ar.resp, ar.err = a.s.applyV3.RoleList(r.AuthRoleList)
default:
panic("not implemented")
}
return ar
}
func (a *applierV3backend) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, err error) {
resp = &pb.PutResponse{}
resp.Header = &pb.ResponseHeader{}
val, leaseID := p.Value, lease.LeaseID(p.Lease)
if txn == nil {
if leaseID != lease.NoLease {
if l := a.s.lessor.Lookup(leaseID); l == nil {
return nil, lease.ErrLeaseNotFound
}
}
txn = a.s.KV().Write()
defer txn.End()
}
var rr *mvcc.RangeResult
if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
rr, err = txn.Range(p.Key, nil, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
}
if p.IgnoreValue || p.IgnoreLease {
if rr == nil || len(rr.KVs) == 0 {
// ignore_{lease,value} flag expects previous key-value pair
return nil, ErrKeyNotFound
}
}
if p.IgnoreValue {
val = rr.KVs[0].Value
}
if p.IgnoreLease {
leaseID = lease.LeaseID(rr.KVs[0].Lease)
}
if p.PrevKv {
if rr != nil && len(rr.KVs) != 0 {
resp.PrevKv = &rr.KVs[0]
}
}
resp.Header.Revision = txn.Put(p.Key, val, leaseID)
return resp, nil
}
func (a *applierV3backend) DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
resp := &pb.DeleteRangeResponse{}
resp.Header = &pb.ResponseHeader{}
if txn == nil {
txn = a.s.kv.Write()
defer txn.End()
}
if isGteRange(dr.RangeEnd) {
dr.RangeEnd = []byte{}
}
if dr.PrevKv {
rr, err := txn.Range(dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
if rr != nil {
for i := range rr.KVs {
resp.PrevKvs = append(resp.PrevKvs, &rr.KVs[i])
}
}
}
resp.Deleted, resp.Header.Revision = txn.DeleteRange(dr.Key, dr.RangeEnd)
return resp, nil
}
func (a *applierV3backend) Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
resp := &pb.RangeResponse{}
resp.Header = &pb.ResponseHeader{}
if txn == nil {
txn = a.s.kv.Read()
defer txn.End()
}
if isGteRange(r.RangeEnd) {
r.RangeEnd = []byte{}
}
limit := r.Limit
if r.SortOrder != pb.RangeRequest_NONE ||
r.MinModRevision != 0 || r.MaxModRevision != 0 ||
r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
// fetch everything; sort and truncate afterwards
limit = 0
}
if limit > 0 {
// fetch one extra for 'more' flag
limit = limit + 1
}
ro := mvcc.RangeOptions{
Limit: limit,
Rev: r.Revision,
Count: r.CountOnly,
}
rr, err := txn.Range(r.Key, r.RangeEnd, ro)
if err != nil {
return nil, err
}
if r.MaxModRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
pruneKVs(rr, f)
}
if r.MinModRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
pruneKVs(rr, f)
}
if r.MaxCreateRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
pruneKVs(rr, f)
}
if r.MinCreateRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
pruneKVs(rr, f)
}
sortOrder := r.SortOrder
if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
// Since current mvcc.Range implementation returns results
// sorted by keys in lexiographically ascending order,
// sort ASCEND by default only when target is not 'KEY'
sortOrder = pb.RangeRequest_ASCEND
}
if sortOrder != pb.RangeRequest_NONE {
var sorter sort.Interface
switch {
case r.SortTarget == pb.RangeRequest_KEY:
sorter = &kvSortByKey{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VERSION:
sorter = &kvSortByVersion{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_CREATE:
sorter = &kvSortByCreate{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_MOD:
sorter = &kvSortByMod{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VALUE:
sorter = &kvSortByValue{&kvSort{rr.KVs}}
}
switch {
case sortOrder == pb.RangeRequest_ASCEND:
sort.Sort(sorter)
case sortOrder == pb.RangeRequest_DESCEND:
sort.Sort(sort.Reverse(sorter))
}
}
if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
rr.KVs = rr.KVs[:r.Limit]
resp.More = true
}
resp.Header.Revision = rr.Rev
resp.Count = int64(rr.Count)
for i := range rr.KVs {
if r.KeysOnly {
rr.KVs[i].Value = nil
}
resp.Kvs = append(resp.Kvs, &rr.KVs[i])
}
return resp, nil
}
func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
isWrite := !isTxnReadonly(rt)
txn := mvcc.NewReadOnlyTxnWrite(a.s.KV().Read())
reqs, ok := a.compareToOps(txn, rt)
if isWrite {
if err := a.checkRequestPut(txn, reqs); err != nil {
txn.End()
return nil, err
}
}
if err := checkRequestRange(txn, reqs); err != nil {
txn.End()
return nil, err
}
resps := make([]*pb.ResponseOp, len(reqs))
txnResp := &pb.TxnResponse{
Responses: resps,
Succeeded: ok,
Header: &pb.ResponseHeader{},
}
// When executing mutable txn ops, etcd must hold the txn lock so
// readers do not see any intermediate results. Since writes are
// serialized on the raft loop, the revision in the read view will
// be the revision of the write txn.
if isWrite {
txn.End()
txn = a.s.KV().Write()
}
for i := range reqs {
resps[i] = a.applyUnion(txn, reqs[i])
}
rev := txn.Rev()
if len(txn.Changes()) != 0 {
rev++
}
txn.End()
txnResp.Header.Revision = rev
return txnResp, nil
}
func (a *applierV3backend) compareToOps(rv mvcc.ReadView, rt *pb.TxnRequest) ([]*pb.RequestOp, bool) {
for _, c := range rt.Compare {
if !applyCompare(rv, c) {
return rt.Failure, false
}
}
return rt.Success, true
}
// applyCompare applies the compare request.
// If the comparison succeeds, it returns true. Otherwise, returns false.
func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
rr, err := rv.Range(c.Key, nil, mvcc.RangeOptions{})
if err != nil {
return false
}
var ckv mvccpb.KeyValue
if len(rr.KVs) != 0 {
ckv = rr.KVs[0]
} else {
// Use the zero value of ckv normally. However...
if c.Target == pb.Compare_VALUE {
// Always fail if we're comparing a value on a key that doesn't exist.
// We can treat non-existence as the empty set explicitly, such that
// even a key with a value of length 0 bytes is still a real key
// that was written that way
return false
}
}
// -1 is less, 0 is equal, 1 is greater
var result int
switch c.Target {
case pb.Compare_VALUE:
tv, _ := c.TargetUnion.(*pb.Compare_Value)
if tv != nil {
result = bytes.Compare(ckv.Value, tv.Value)
}
case pb.Compare_CREATE:
tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
if tv != nil {
result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
}
case pb.Compare_MOD:
tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
if tv != nil {
result = compareInt64(ckv.ModRevision, tv.ModRevision)
}
case pb.Compare_VERSION:
tv, _ := c.TargetUnion.(*pb.Compare_Version)
if tv != nil {
result = compareInt64(ckv.Version, tv.Version)
}
}
switch c.Result {
case pb.Compare_EQUAL:
return result == 0
case pb.Compare_NOT_EQUAL:
return result != 0
case pb.Compare_GREATER:
return result > 0
case pb.Compare_LESS:
return result < 0
}
return true
}
func (a *applierV3backend) applyUnion(txn mvcc.TxnWrite, union *pb.RequestOp) *pb.ResponseOp {
switch tv := union.Request.(type) {
case *pb.RequestOp_RequestRange:
if tv.RequestRange != nil {
resp, err := a.Range(txn, tv.RequestRange)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{ResponseRange: resp}}
}
case *pb.RequestOp_RequestPut:
if tv.RequestPut != nil {
resp, err := a.Put(txn, tv.RequestPut)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{ResponsePut: resp}}
}
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange != nil {
resp, err := a.DeleteRange(txn, tv.RequestDeleteRange)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{ResponseDeleteRange: resp}}
}
default:
// empty union
return nil
}
return nil
}
func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error) {
resp := &pb.CompactionResponse{}
resp.Header = &pb.ResponseHeader{}
ch, err := a.s.KV().Compact(compaction.Revision)
if err != nil {
return nil, ch, err
}
// get the current revision. which key to get is not important.
rr, _ := a.s.KV().Range([]byte("compaction"), nil, mvcc.RangeOptions{})
resp.Header.Revision = rr.Rev
return resp, ch, err
}
func (a *applierV3backend) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
l, err := a.s.lessor.Grant(lease.LeaseID(lc.ID), lc.TTL)
resp := &pb.LeaseGrantResponse{}
if err == nil {
resp.ID = int64(l.ID)
resp.TTL = l.TTL()
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
err := a.s.lessor.Revoke(lease.LeaseID(lc.ID))
return &pb.LeaseRevokeResponse{Header: newHeader(a.s)}, err
}
func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
resp := &pb.AlarmResponse{}
oldCount := len(a.s.alarmStore.Get(ar.Alarm))
switch ar.Action {
case pb.AlarmRequest_GET:
resp.Alarms = a.s.alarmStore.Get(ar.Alarm)
case pb.AlarmRequest_ACTIVATE:
m := a.s.alarmStore.Activate(types.ID(ar.MemberID), ar.Alarm)
if m == nil {
break
}
resp.Alarms = append(resp.Alarms, m)
activated := oldCount == 0 && len(a.s.alarmStore.Get(m.Alarm)) == 1
if !activated {
break
}
switch m.Alarm {
case pb.AlarmType_NOSPACE:
plog.Warningf("alarm raised %+v", m)
a.s.applyV3 = newApplierV3Capped(a)
default:
plog.Errorf("unimplemented alarm activation (%+v)", m)
}
case pb.AlarmRequest_DEACTIVATE:
m := a.s.alarmStore.Deactivate(types.ID(ar.MemberID), ar.Alarm)
if m == nil {
break
}
resp.Alarms = append(resp.Alarms, m)
deactivated := oldCount > 0 && len(a.s.alarmStore.Get(ar.Alarm)) == 0
if !deactivated {
break
}
switch m.Alarm {
case pb.AlarmType_NOSPACE:
plog.Infof("alarm disarmed %+v", ar)
a.s.applyV3 = a.s.newApplierV3()
default:
plog.Errorf("unimplemented alarm deactivation (%+v)", m)
}
default:
return nil, nil
}
return resp, nil
}
type applierV3Capped struct {
applierV3
q backendQuota
}
// newApplierV3Capped creates an applyV3 that will reject Puts and transactions
// with Puts so that the number of keys in the store is capped.
func newApplierV3Capped(base applierV3) applierV3 { return &applierV3Capped{applierV3: base} }
func (a *applierV3Capped) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error) {
return nil, ErrNoSpace
}
func (a *applierV3Capped) Txn(r *pb.TxnRequest) (*pb.TxnResponse, error) {
if a.q.Cost(r) > 0 {
return nil, ErrNoSpace
}
return a.applierV3.Txn(r)
}
func (a *applierV3Capped) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
return nil, ErrNoSpace
}
func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
err := a.s.AuthStore().AuthEnable()
if err != nil {
return nil, err
}
return &pb.AuthEnableResponse{Header: newHeader(a.s)}, nil
}
func (a *applierV3backend) AuthDisable() (*pb.AuthDisableResponse, error) {
a.s.AuthStore().AuthDisable()
return &pb.AuthDisableResponse{Header: newHeader(a.s)}, nil
}
func (a *applierV3backend) Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error) {
ctx := context.WithValue(context.WithValue(a.s.ctx, "index", a.s.consistIndex.ConsistentIndex()), "simpleToken", r.SimpleToken)
resp, err := a.s.AuthStore().Authenticate(ctx, r.Name, r.Password)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := a.s.AuthStore().UserAdd(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := a.s.AuthStore().UserDelete(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := a.s.AuthStore().UserChangePassword(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := a.s.AuthStore().UserGrantRole(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := a.s.AuthStore().UserGet(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := a.s.AuthStore().UserRevokeRole(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := a.s.AuthStore().RoleAdd(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := a.s.AuthStore().RoleGrantPermission(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := a.s.AuthStore().RoleGet(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := a.s.AuthStore().RoleRevokePermission(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := a.s.AuthStore().RoleDelete(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := a.s.AuthStore().UserList(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := a.s.AuthStore().RoleList(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
type quotaApplierV3 struct {
applierV3
q Quota
}
func newQuotaApplierV3(s *EtcdServer, app applierV3) applierV3 {
return &quotaApplierV3{app, NewBackendQuota(s)}
}
func (a *quotaApplierV3) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error) {
ok := a.q.Available(p)
resp, err := a.applierV3.Put(txn, p)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
func (a *quotaApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
ok := a.q.Available(rt)
resp, err := a.applierV3.Txn(rt)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
ok := a.q.Available(lc)
resp, err := a.applierV3.LeaseGrant(lc)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
type kvSort struct{ kvs []mvccpb.KeyValue }
func (s *kvSort) Swap(i, j int) {
t := s.kvs[i]
s.kvs[i] = s.kvs[j]
s.kvs[j] = t
}
func (s *kvSort) Len() int { return len(s.kvs) }
type kvSortByKey struct{ *kvSort }
func (s *kvSortByKey) Less(i, j int) bool {
return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
}
type kvSortByVersion struct{ *kvSort }
func (s *kvSortByVersion) Less(i, j int) bool {
return (s.kvs[i].Version - s.kvs[j].Version) < 0
}
type kvSortByCreate struct{ *kvSort }
func (s *kvSortByCreate) Less(i, j int) bool {
return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
}
type kvSortByMod struct{ *kvSort }
func (s *kvSortByMod) Less(i, j int) bool {
return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
}
type kvSortByValue struct{ *kvSort }
func (s *kvSortByValue) Less(i, j int) bool {
return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
}
func (a *applierV3backend) checkRequestPut(rv mvcc.ReadView, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
preq := tv.RequestPut
if preq == nil {
continue
}
if preq.IgnoreValue || preq.IgnoreLease {
// expects previous key-value, error if not exist
rr, err := rv.Range(preq.Key, nil, mvcc.RangeOptions{})
if err != nil {
return err
}
if rr == nil || len(rr.KVs) == 0 {
return ErrKeyNotFound
}
}
if lease.LeaseID(preq.Lease) == lease.NoLease {
continue
}
if l := a.s.lessor.Lookup(lease.LeaseID(preq.Lease)); l == nil {
return lease.ErrLeaseNotFound
}
}
return nil
}
func checkRequestRange(rv mvcc.ReadView, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestRange)
if !ok {
continue
}
greq := tv.RequestRange
if greq == nil || greq.Revision == 0 {
continue
}
if greq.Revision > rv.Rev() {
return mvcc.ErrFutureRev
}
if greq.Revision < rv.FirstRev() {
return mvcc.ErrCompacted
}
}
return nil
}
func compareInt64(a, b int64) int {
switch {
case a < b:
return -1
case a > b:
return 1
default:
return 0
}
}
// isGteRange determines if the range end is a >= range. This works around grpc
// sending empty byte strings as nil; >= is encoded in the range end as '\0'.
func isGteRange(rangeEnd []byte) bool {
return len(rangeEnd) == 1 && rangeEnd[0] == 0
}
func noSideEffect(r *pb.InternalRaftRequest) bool {
return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil
}
func removeNeedlessRangeReqs(txn *pb.TxnRequest) {
f := func(ops []*pb.RequestOp) []*pb.RequestOp {
j := 0
for i := 0; i < len(ops); i++ {
if _, ok := ops[i].Request.(*pb.RequestOp_RequestRange); ok {
continue
}
ops[j] = ops[i]
j++
}
return ops[:j]
}
txn.Success = f(txn.Success)
txn.Failure = f(txn.Failure)
}
func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
j := 0
for i := range rr.KVs {
rr.KVs[j] = rr.KVs[i]
if !isPrunable(&rr.KVs[i]) {
j++
}
}
rr.KVs = rr.KVs[:j]
}
func newHeader(s *EtcdServer) *pb.ResponseHeader {
return &pb.ResponseHeader{
ClusterId: uint64(s.Cluster().ID()),
MemberId: uint64(s.ID()),
Revision: s.KV().Rev(),
RaftTerm: s.Term(),
}
}

196
vendor/github.com/coreos/etcd/etcdserver/apply_auth.go generated vendored Normal file
View file

@ -0,0 +1,196 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"sync"
"github.com/coreos/etcd/auth"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
)
type authApplierV3 struct {
applierV3
as auth.AuthStore
// mu serializes Apply so that user isn't corrupted and so that
// serialized requests don't leak data from TOCTOU errors
mu sync.Mutex
authInfo auth.AuthInfo
}
func newAuthApplierV3(as auth.AuthStore, base applierV3) *authApplierV3 {
return &authApplierV3{applierV3: base, as: as}
}
func (aa *authApplierV3) Apply(r *pb.InternalRaftRequest) *applyResult {
aa.mu.Lock()
defer aa.mu.Unlock()
if r.Header != nil {
// backward-compatible with pre-3.0 releases when internalRaftRequest
// does not have header field
aa.authInfo.Username = r.Header.Username
aa.authInfo.Revision = r.Header.AuthRevision
}
if needAdminPermission(r) {
if err := aa.as.IsAdminPermitted(&aa.authInfo); err != nil {
aa.authInfo.Username = ""
aa.authInfo.Revision = 0
return &applyResult{err: err}
}
}
ret := aa.applierV3.Apply(r)
aa.authInfo.Username = ""
aa.authInfo.Revision = 0
return ret
}
func (aa *authApplierV3) Put(txn mvcc.TxnWrite, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := aa.as.IsPutPermitted(&aa.authInfo, r.Key); err != nil {
return nil, err
}
if r.PrevKv {
err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, nil)
if err != nil {
return nil, err
}
}
return aa.applierV3.Put(txn, r)
}
func (aa *authApplierV3) Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
return nil, err
}
return aa.applierV3.Range(txn, r)
}
func (aa *authApplierV3) DeleteRange(txn mvcc.TxnWrite, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
if err := aa.as.IsDeleteRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
return nil, err
}
if r.PrevKv {
err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd)
if err != nil {
return nil, err
}
}
return aa.applierV3.DeleteRange(txn, r)
}
func checkTxnReqsPermission(as auth.AuthStore, ai *auth.AuthInfo, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
switch tv := requ.Request.(type) {
case *pb.RequestOp_RequestRange:
if tv.RequestRange == nil {
continue
}
if err := as.IsRangePermitted(ai, tv.RequestRange.Key, tv.RequestRange.RangeEnd); err != nil {
return err
}
case *pb.RequestOp_RequestPut:
if tv.RequestPut == nil {
continue
}
if err := as.IsPutPermitted(ai, tv.RequestPut.Key); err != nil {
return err
}
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange == nil {
continue
}
if tv.RequestDeleteRange.PrevKv {
err := as.IsRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
if err != nil {
return err
}
}
err := as.IsDeleteRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
if err != nil {
return err
}
}
}
return nil
}
func checkTxnAuth(as auth.AuthStore, ai *auth.AuthInfo, rt *pb.TxnRequest) error {
for _, c := range rt.Compare {
if err := as.IsRangePermitted(ai, c.Key, nil); err != nil {
return err
}
}
if err := checkTxnReqsPermission(as, ai, rt.Success); err != nil {
return err
}
if err := checkTxnReqsPermission(as, ai, rt.Failure); err != nil {
return err
}
return nil
}
func (aa *authApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := checkTxnAuth(aa.as, &aa.authInfo, rt); err != nil {
return nil, err
}
return aa.applierV3.Txn(rt)
}
func needAdminPermission(r *pb.InternalRaftRequest) bool {
switch {
case r.AuthEnable != nil:
return true
case r.AuthDisable != nil:
return true
case r.AuthUserAdd != nil:
return true
case r.AuthUserDelete != nil:
return true
case r.AuthUserChangePassword != nil:
return true
case r.AuthUserGrantRole != nil:
return true
case r.AuthUserGet != nil:
return true
case r.AuthUserRevokeRole != nil:
return true
case r.AuthRoleAdd != nil:
return true
case r.AuthRoleGrantPermission != nil:
return true
case r.AuthRoleGet != nil:
return true
case r.AuthRoleRevokePermission != nil:
return true
case r.AuthRoleDelete != nil:
return true
case r.AuthUserList != nil:
return true
case r.AuthRoleList != nil:
return true
default:
return false
}
}

140
vendor/github.com/coreos/etcd/etcdserver/apply_v2.go generated vendored Normal file
View file

@ -0,0 +1,140 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"path"
"time"
"github.com/coreos/etcd/etcdserver/api"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/store"
"github.com/coreos/go-semver/semver"
)
// ApplierV2 is the interface for processing V2 raft messages
type ApplierV2 interface {
Delete(r *pb.Request) Response
Post(r *pb.Request) Response
Put(r *pb.Request) Response
QGet(r *pb.Request) Response
Sync(r *pb.Request) Response
}
func NewApplierV2(s store.Store, c *membership.RaftCluster) ApplierV2 {
return &applierV2store{store: s, cluster: c}
}
type applierV2store struct {
store store.Store
cluster *membership.RaftCluster
}
func (a *applierV2store) Delete(r *pb.Request) Response {
switch {
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndDelete(r.Path, r.PrevValue, r.PrevIndex))
default:
return toResponse(a.store.Delete(r.Path, r.Dir, r.Recursive))
}
}
func (a *applierV2store) Post(r *pb.Request) Response {
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, true, toTTLOptions(r)))
}
func (a *applierV2store) Put(r *pb.Request) Response {
ttlOptions := toTTLOptions(r)
exists, existsSet := pbutil.GetBool(r.PrevExist)
switch {
case existsSet:
if exists {
if r.PrevIndex == 0 && r.PrevValue == "" {
return toResponse(a.store.Update(r.Path, r.Val, ttlOptions))
}
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
}
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, false, ttlOptions))
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
default:
if storeMemberAttributeRegexp.MatchString(r.Path) {
id := membership.MustParseMemberIDFromKey(path.Dir(r.Path))
var attr membership.Attributes
if err := json.Unmarshal([]byte(r.Val), &attr); err != nil {
plog.Panicf("unmarshal %s should never fail: %v", r.Val, err)
}
if a.cluster != nil {
a.cluster.UpdateAttributes(id, attr)
}
// return an empty response since there is no consumer.
return Response{}
}
if r.Path == membership.StoreClusterVersionKey() {
if a.cluster != nil {
a.cluster.SetVersion(semver.Must(semver.NewVersion(r.Val)), api.UpdateCapability)
}
// return an empty response since there is no consumer.
return Response{}
}
return toResponse(a.store.Set(r.Path, r.Dir, r.Val, ttlOptions))
}
}
func (a *applierV2store) QGet(r *pb.Request) Response {
return toResponse(a.store.Get(r.Path, r.Recursive, r.Sorted))
}
func (a *applierV2store) Sync(r *pb.Request) Response {
a.store.DeleteExpiredKeys(time.Unix(0, r.Time))
return Response{}
}
// applyV2Request interprets r as a call to store.X and returns a Response interpreted
// from store.Event
func (s *EtcdServer) applyV2Request(r *pb.Request) Response {
toTTLOptions(r)
switch r.Method {
case "POST":
return s.applyV2.Post(r)
case "PUT":
return s.applyV2.Put(r)
case "DELETE":
return s.applyV2.Delete(r)
case "QGET":
return s.applyV2.QGet(r)
case "SYNC":
return s.applyV2.Sync(r)
default:
// This should never be reached, but just in case:
return Response{err: ErrUnknownMethod}
}
}
func toTTLOptions(r *pb.Request) store.TTLOptionSet {
refresh, _ := pbutil.GetBool(r.Refresh)
ttlOptions := store.TTLOptionSet{Refresh: refresh}
if r.Expiration != 0 {
ttlOptions.ExpireTime = time.Unix(0, r.Expiration)
}
return ttlOptions
}
func toResponse(ev *store.Event, err error) Response {
return Response{Event: ev, err: err}
}

81
vendor/github.com/coreos/etcd/etcdserver/backend.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"os"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
)
func newBackend(cfg *ServerConfig) backend.Backend {
bcfg := backend.DefaultBackendConfig()
bcfg.Path = cfg.backendPath()
if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes {
// permit 10% excess over quota for disarm
bcfg.MmapSize = uint64(cfg.QuotaBackendBytes + cfg.QuotaBackendBytes/10)
}
return backend.New(bcfg)
}
// openSnapshotBackend renames a snapshot db to the current etcd db and opens it.
func openSnapshotBackend(cfg *ServerConfig, ss *snap.Snapshotter, snapshot raftpb.Snapshot) (backend.Backend, error) {
snapPath, err := ss.DBFilePath(snapshot.Metadata.Index)
if err != nil {
return nil, fmt.Errorf("database snapshot file path error: %v", err)
}
if err := os.Rename(snapPath, cfg.backendPath()); err != nil {
return nil, fmt.Errorf("rename snapshot file error: %v", err)
}
return openBackend(cfg), nil
}
// openBackend returns a backend using the current etcd db.
func openBackend(cfg *ServerConfig) backend.Backend {
fn := cfg.backendPath()
beOpened := make(chan backend.Backend)
go func() {
beOpened <- newBackend(cfg)
}()
select {
case be := <-beOpened:
return be
case <-time.After(time.Second):
plog.Warningf("another etcd process is using %q and holds the file lock.", fn)
plog.Warningf("waiting for it to exit before starting...")
}
return <-beOpened
}
// recoverBackendSnapshot recovers the DB from a snapshot in case etcd crashes
// before updating the backend db after persisting raft snapshot to disk,
// violating the invariant snapshot.Metadata.Index < db.consistentIndex. In this
// case, replace the db with the snapshot db sent by the leader.
func recoverSnapshotBackend(cfg *ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) {
var cIndex consistentIndex
kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex)
defer kv.Close()
if snapshot.Metadata.Index <= kv.ConsistentIndex() {
return oldbe, nil
}
oldbe.Close()
return openSnapshotBackend(cfg, snap.New(cfg.SnapDir()), snapshot)
}

View file

@ -0,0 +1,258 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"sort"
"time"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
)
// isMemberBootstrapped tries to check if the given member has been bootstrapped
// in the given cluster.
func isMemberBootstrapped(cl *membership.RaftCluster, member string, rt http.RoundTripper, timeout time.Duration) bool {
rcl, err := getClusterFromRemotePeers(getRemotePeerURLs(cl, member), timeout, false, rt)
if err != nil {
return false
}
id := cl.MemberByName(member).ID
m := rcl.Member(id)
if m == nil {
return false
}
if len(m.ClientURLs) > 0 {
return true
}
return false
}
// GetClusterFromRemotePeers takes a set of URLs representing etcd peers, and
// attempts to construct a Cluster by accessing the members endpoint on one of
// these URLs. The first URL to provide a response is used. If no URLs provide
// a response, or a Cluster cannot be successfully created from a received
// response, an error is returned.
// Each request has a 10-second timeout. Because the upper limit of TTL is 5s,
// 10 second is enough for building connection and finishing request.
func GetClusterFromRemotePeers(urls []string, rt http.RoundTripper) (*membership.RaftCluster, error) {
return getClusterFromRemotePeers(urls, 10*time.Second, true, rt)
}
// If logerr is true, it prints out more error messages.
func getClusterFromRemotePeers(urls []string, timeout time.Duration, logerr bool, rt http.RoundTripper) (*membership.RaftCluster, error) {
cc := &http.Client{
Transport: rt,
Timeout: timeout,
}
for _, u := range urls {
resp, err := cc.Get(u + "/members")
if err != nil {
if logerr {
plog.Warningf("could not get cluster response from %s: %v", u, err)
}
continue
}
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
if logerr {
plog.Warningf("could not read the body of cluster response: %v", err)
}
continue
}
var membs []*membership.Member
if err = json.Unmarshal(b, &membs); err != nil {
if logerr {
plog.Warningf("could not unmarshal cluster response: %v", err)
}
continue
}
id, err := types.IDFromString(resp.Header.Get("X-Etcd-Cluster-ID"))
if err != nil {
if logerr {
plog.Warningf("could not parse the cluster ID from cluster res: %v", err)
}
continue
}
// check the length of membership members
// if the membership members are present then prepare and return raft cluster
// if membership members are not present then the raft cluster formed will be
// an invalid empty cluster hence return failed to get raft cluster member(s) from the given urls error
if len(membs) > 0 {
return membership.NewClusterFromMembers("", id, membs), nil
}
return nil, fmt.Errorf("failed to get raft cluster member(s) from the given urls.")
}
return nil, fmt.Errorf("could not retrieve cluster information from the given urls")
}
// getRemotePeerURLs returns peer urls of remote members in the cluster. The
// returned list is sorted in ascending lexicographical order.
func getRemotePeerURLs(cl *membership.RaftCluster, local string) []string {
us := make([]string, 0)
for _, m := range cl.Members() {
if m.Name == local {
continue
}
us = append(us, m.PeerURLs...)
}
sort.Strings(us)
return us
}
// getVersions returns the versions of the members in the given cluster.
// The key of the returned map is the member's ID. The value of the returned map
// is the semver versions string, including server and cluster.
// If it fails to get the version of a member, the key will be nil.
func getVersions(cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) map[string]*version.Versions {
members := cl.Members()
vers := make(map[string]*version.Versions)
for _, m := range members {
if m.ID == local {
cv := "not_decided"
if cl.Version() != nil {
cv = cl.Version().String()
}
vers[m.ID.String()] = &version.Versions{Server: version.Version, Cluster: cv}
continue
}
ver, err := getVersion(m, rt)
if err != nil {
plog.Warningf("cannot get the version of member %s (%v)", m.ID, err)
vers[m.ID.String()] = nil
} else {
vers[m.ID.String()] = ver
}
}
return vers
}
// decideClusterVersion decides the cluster version based on the versions map.
// The returned version is the min server version in the map, or nil if the min
// version in unknown.
func decideClusterVersion(vers map[string]*version.Versions) *semver.Version {
var cv *semver.Version
lv := semver.Must(semver.NewVersion(version.Version))
for mid, ver := range vers {
if ver == nil {
return nil
}
v, err := semver.NewVersion(ver.Server)
if err != nil {
plog.Errorf("cannot understand the version of member %s (%v)", mid, err)
return nil
}
if lv.LessThan(*v) {
plog.Warningf("the local etcd version %s is not up-to-date", lv.String())
plog.Warningf("member %s has a higher version %s", mid, ver.Server)
}
if cv == nil {
cv = v
} else if v.LessThan(*cv) {
cv = v
}
}
return cv
}
// isCompatibleWithCluster return true if the local member has a compatible version with
// the current running cluster.
// The version is considered as compatible when at least one of the other members in the cluster has a
// cluster version in the range of [MinClusterVersion, Version] and no known members has a cluster version
// out of the range.
// We set this rule since when the local member joins, another member might be offline.
func isCompatibleWithCluster(cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) bool {
vers := getVersions(cl, local, rt)
minV := semver.Must(semver.NewVersion(version.MinClusterVersion))
maxV := semver.Must(semver.NewVersion(version.Version))
maxV = &semver.Version{
Major: maxV.Major,
Minor: maxV.Minor,
}
return isCompatibleWithVers(vers, local, minV, maxV)
}
func isCompatibleWithVers(vers map[string]*version.Versions, local types.ID, minV, maxV *semver.Version) bool {
var ok bool
for id, v := range vers {
// ignore comparison with local version
if id == local.String() {
continue
}
if v == nil {
continue
}
clusterv, err := semver.NewVersion(v.Cluster)
if err != nil {
plog.Errorf("cannot understand the cluster version of member %s (%v)", id, err)
continue
}
if clusterv.LessThan(*minV) {
plog.Warningf("the running cluster version(%v) is lower than the minimal cluster version(%v) supported", clusterv.String(), minV.String())
return false
}
if maxV.LessThan(*clusterv) {
plog.Warningf("the running cluster version(%v) is higher than the maximum cluster version(%v) supported", clusterv.String(), maxV.String())
return false
}
ok = true
}
return ok
}
// getVersion returns the Versions of the given member via its
// peerURLs. Returns the last error if it fails to get the version.
func getVersion(m *membership.Member, rt http.RoundTripper) (*version.Versions, error) {
cc := &http.Client{
Transport: rt,
}
var (
err error
resp *http.Response
)
for _, u := range m.PeerURLs {
resp, err = cc.Get(u + "/version")
if err != nil {
plog.Warningf("failed to reach the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
var b []byte
b, err = ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
plog.Warningf("failed to read out the response body from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
var vers version.Versions
if err = json.Unmarshal(b, &vers); err != nil {
plog.Warningf("failed to unmarshal the response body got from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
return &vers, nil
}
return nil, err
}

204
vendor/github.com/coreos/etcd/etcdserver/config.go generated vendored Normal file
View file

@ -0,0 +1,204 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"path/filepath"
"sort"
"strings"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/transport"
"github.com/coreos/etcd/pkg/types"
)
// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
type ServerConfig struct {
Name string
DiscoveryURL string
DiscoveryProxy string
ClientURLs types.URLs
PeerURLs types.URLs
DataDir string
// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
// rather than the dataDir/member/wal.
DedicatedWALDir string
SnapCount uint64
MaxSnapFiles uint
MaxWALFiles uint
InitialPeerURLsMap types.URLsMap
InitialClusterToken string
NewCluster bool
ForceNewCluster bool
PeerTLSInfo transport.TLSInfo
TickMs uint
ElectionTicks int
BootstrapTimeout time.Duration
AutoCompactionRetention int
QuotaBackendBytes int64
StrictReconfigCheck bool
// ClientCertAuthEnabled is true when cert has been signed by the client CA.
ClientCertAuthEnabled bool
AuthToken string
}
// VerifyBootstrap sanity-checks the initial config for bootstrap case
// and returns an error for things that should never happen.
func (c *ServerConfig) VerifyBootstrap() error {
if err := c.hasLocalMember(); err != nil {
return err
}
if err := c.advertiseMatchesCluster(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
return fmt.Errorf("initial cluster unset and no discovery URL found")
}
return nil
}
// VerifyJoinExisting sanity-checks the initial config for join existing cluster
// case and returns an error for things that should never happen.
func (c *ServerConfig) VerifyJoinExisting() error {
// The member has announced its peer urls to the cluster before starting; no need to
// set the configuration again.
if err := c.hasLocalMember(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.DiscoveryURL != "" {
return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
}
return nil
}
// hasLocalMember checks that the cluster at least contains the local server.
func (c *ServerConfig) hasLocalMember() error {
if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
}
return nil
}
// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
func (c *ServerConfig) advertiseMatchesCluster() error {
urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
urls.Sort()
sort.Strings(apurls)
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
defer cancel()
if !netutil.URLStringsEqual(ctx, apurls, urls.StringSlice()) {
umap := map[string]types.URLs{c.Name: c.PeerURLs}
return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
}
return nil
}
func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") }
func (c *ServerConfig) WALDir() string {
if c.DedicatedWALDir != "" {
return c.DedicatedWALDir
}
return filepath.Join(c.MemberDir(), "wal")
}
func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
// ReqTimeout returns timeout for request to finish.
func (c *ServerConfig) ReqTimeout() time.Duration {
// 5s for queue waiting, computation and disk IO delay
// + 2 * election timeout for possible leader election
return 5*time.Second + 2*time.Duration(c.ElectionTicks)*time.Duration(c.TickMs)*time.Millisecond
}
func (c *ServerConfig) electionTimeout() time.Duration {
return time.Duration(c.ElectionTicks) * time.Duration(c.TickMs) * time.Millisecond
}
func (c *ServerConfig) peerDialTimeout() time.Duration {
// 1s for queue wait and system delay
// + one RTT, which is smaller than 1/5 election timeout
return time.Second + time.Duration(c.ElectionTicks)*time.Duration(c.TickMs)*time.Millisecond/5
}
func (c *ServerConfig) PrintWithInitial() { c.print(true) }
func (c *ServerConfig) Print() { c.print(false) }
func (c *ServerConfig) print(initial bool) {
plog.Infof("name = %s", c.Name)
if c.ForceNewCluster {
plog.Infof("force new cluster")
}
plog.Infof("data dir = %s", c.DataDir)
plog.Infof("member dir = %s", c.MemberDir())
if c.DedicatedWALDir != "" {
plog.Infof("dedicated WAL dir = %s", c.DedicatedWALDir)
}
plog.Infof("heartbeat = %dms", c.TickMs)
plog.Infof("election = %dms", c.ElectionTicks*int(c.TickMs))
plog.Infof("snapshot count = %d", c.SnapCount)
if len(c.DiscoveryURL) != 0 {
plog.Infof("discovery URL= %s", c.DiscoveryURL)
if len(c.DiscoveryProxy) != 0 {
plog.Infof("discovery proxy = %s", c.DiscoveryProxy)
}
}
plog.Infof("advertise client URLs = %s", c.ClientURLs)
if initial {
plog.Infof("initial advertise peer URLs = %s", c.PeerURLs)
plog.Infof("initial cluster = %s", c.InitialPeerURLsMap)
}
}
func checkDuplicateURL(urlsmap types.URLsMap) bool {
um := make(map[string]bool)
for _, urls := range urlsmap {
for _, url := range urls {
u := url.String()
if um[u] {
return true
}
um[u] = true
}
}
return false
}
func (c *ServerConfig) bootstrapTimeout() time.Duration {
if c.BootstrapTimeout != 0 {
return c.BootstrapTimeout
}
return time.Second
}
func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") }

View file

@ -0,0 +1,33 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"sync/atomic"
)
// consistentIndex represents the offset of an entry in a consistent replica log.
// It implements the mvcc.ConsistentIndexGetter interface.
// It is always set to the offset of current entry before executing the entry,
// so ConsistentWatchableKV could get the consistent index from it.
type consistentIndex uint64
func (i *consistentIndex) setConsistentIndex(v uint64) {
atomic.StoreUint64((*uint64)(i), v)
}
func (i *consistentIndex) ConsistentIndex() uint64 {
return atomic.LoadUint64((*uint64)(i))
}

16
vendor/github.com/coreos/etcd/etcdserver/doc.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package etcdserver defines how etcd servers interact and store their states.
package etcdserver

46
vendor/github.com/coreos/etcd/etcdserver/errors.go generated vendored Normal file
View file

@ -0,0 +1,46 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"errors"
"fmt"
)
var (
ErrUnknownMethod = errors.New("etcdserver: unknown method")
ErrStopped = errors.New("etcdserver: server stopped")
ErrCanceled = errors.New("etcdserver: request cancelled")
ErrTimeout = errors.New("etcdserver: request timed out")
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
ErrNoLeader = errors.New("etcdserver: no leader")
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
ErrNoSpace = errors.New("etcdserver: no space")
ErrTooManyRequests = errors.New("etcdserver: too many requests")
ErrUnhealthy = errors.New("etcdserver: unhealthy cluster")
ErrKeyNotFound = errors.New("etcdserver: key not found")
)
type DiscoveryError struct {
Op string
Err error
}
func (e DiscoveryError) Error() string {
return fmt.Sprintf("failed to %s discovery cluster (%v)", e.Op, e.Err)
}

102
vendor/github.com/coreos/etcd/etcdserver/metrics.go generated vendored Normal file
View file

@ -0,0 +1,102 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
"github.com/coreos/etcd/pkg/runtime"
"github.com/prometheus/client_golang/prometheus"
)
var (
hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "has_leader",
Help: "Whether or not a leader exists. 1 is existence, 0 is not.",
})
leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "leader_changes_seen_total",
Help: "The number of leader changes seen.",
})
proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_committed_total",
Help: "The total number of consensus proposals committed.",
})
proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_applied_total",
Help: "The total number of consensus proposals applied.",
})
proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_pending",
Help: "The current number of pending proposals to commit.",
})
proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_failed_total",
Help: "The total number of failed proposals seen.",
})
leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "server",
Name: "lease_expired_total",
Help: "The total number of expired leases.",
})
)
func init() {
prometheus.MustRegister(hasLeader)
prometheus.MustRegister(leaderChanges)
prometheus.MustRegister(proposalsCommitted)
prometheus.MustRegister(proposalsApplied)
prometheus.MustRegister(proposalsPending)
prometheus.MustRegister(proposalsFailed)
prometheus.MustRegister(leaseExpired)
}
func monitorFileDescriptor(done <-chan struct{}) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
used, err := runtime.FDUsage()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
if used >= limit/5*4 {
plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
}
select {
case <-ticker.C:
case <-done:
return
}
}
}

121
vendor/github.com/coreos/etcd/etcdserver/quota.go generated vendored Normal file
View file

@ -0,0 +1,121 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
const (
// DefaultQuotaBytes is the number of bytes the backend Size may
// consume before exceeding the space quota.
DefaultQuotaBytes = int64(2 * 1024 * 1024 * 1024) // 2GB
// MaxQuotaBytes is the maximum number of bytes suggested for a backend
// quota. A larger quota may lead to degraded performance.
MaxQuotaBytes = int64(8 * 1024 * 1024 * 1024) // 8GB
)
// Quota represents an arbitrary quota against arbitrary requests. Each request
// costs some charge; if there is not enough remaining charge, then there are
// too few resources available within the quota to apply the request.
type Quota interface {
// Available judges whether the given request fits within the quota.
Available(req interface{}) bool
// Cost computes the charge against the quota for a given request.
Cost(req interface{}) int
// Remaining is the amount of charge left for the quota.
Remaining() int64
}
type passthroughQuota struct{}
func (*passthroughQuota) Available(interface{}) bool { return true }
func (*passthroughQuota) Cost(interface{}) int { return 0 }
func (*passthroughQuota) Remaining() int64 { return 1 }
type backendQuota struct {
s *EtcdServer
maxBackendBytes int64
}
const (
// leaseOverhead is an estimate for the cost of storing a lease
leaseOverhead = 64
// kvOverhead is an estimate for the cost of storing a key's metadata
kvOverhead = 256
)
func NewBackendQuota(s *EtcdServer) Quota {
if s.Cfg.QuotaBackendBytes < 0 {
// disable quotas if negative
plog.Warningf("disabling backend quota")
return &passthroughQuota{}
}
if s.Cfg.QuotaBackendBytes == 0 {
// use default size if no quota size given
return &backendQuota{s, DefaultQuotaBytes}
}
if s.Cfg.QuotaBackendBytes > MaxQuotaBytes {
plog.Warningf("backend quota %v exceeds maximum recommended quota %v", s.Cfg.QuotaBackendBytes, MaxQuotaBytes)
}
return &backendQuota{s, s.Cfg.QuotaBackendBytes}
}
func (b *backendQuota) Available(v interface{}) bool {
// TODO: maybe optimize backend.Size()
return b.s.Backend().Size()+int64(b.Cost(v)) < b.maxBackendBytes
}
func (b *backendQuota) Cost(v interface{}) int {
switch r := v.(type) {
case *pb.PutRequest:
return costPut(r)
case *pb.TxnRequest:
return costTxn(r)
case *pb.LeaseGrantRequest:
return leaseOverhead
default:
panic("unexpected cost")
}
}
func costPut(r *pb.PutRequest) int { return kvOverhead + len(r.Key) + len(r.Value) }
func costTxnReq(u *pb.RequestOp) int {
r := u.GetRequestPut()
if r == nil {
return 0
}
return costPut(r)
}
func costTxn(r *pb.TxnRequest) int {
sizeSuccess := 0
for _, u := range r.Success {
sizeSuccess += costTxnReq(u)
}
sizeFailure := 0
for _, u := range r.Failure {
sizeFailure += costTxnReq(u)
}
if sizeFailure > sizeSuccess {
return sizeFailure
}
return sizeSuccess
}
func (b *backendQuota) Remaining() int64 {
return b.maxBackendBytes - b.s.Backend().Size()
}

594
vendor/github.com/coreos/etcd/etcdserver/raft.go generated vendored Normal file
View file

@ -0,0 +1,594 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"expvar"
"sort"
"sync"
"sync/atomic"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/contention"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/rafthttp"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
"github.com/coreos/pkg/capnslog"
)
const (
// Number of entries for slow follower to catch-up after compacting
// the raft storage entries.
// We expect the follower has a millisecond level latency with the leader.
// The max throughput is around 10K. Keep a 5K entries is enough for helping
// follower to catch up.
numberOfCatchUpEntries = 5000
// The max throughput of etcd will not exceed 100MB/s (100K * 1KB value).
// Assuming the RTT is around 10ms, 1MB max size is large enough.
maxSizePerMsg = 1 * 1024 * 1024
// Never overflow the rafthttp buffer, which is 4096.
// TODO: a better const?
maxInflightMsgs = 4096 / 8
)
var (
// protects raftStatus
raftStatusMu sync.Mutex
// indirection for expvar func interface
// expvar panics when publishing duplicate name
// expvar does not support remove a registered name
// so only register a func that calls raftStatus
// and change raftStatus as we need.
raftStatus func() raft.Status
)
func init() {
raft.SetLogger(capnslog.NewPackageLogger("github.com/coreos/etcd", "raft"))
expvar.Publish("raft.status", expvar.Func(func() interface{} {
raftStatusMu.Lock()
defer raftStatusMu.Unlock()
return raftStatus()
}))
}
type RaftTimer interface {
Index() uint64
Term() uint64
}
// apply contains entries, snapshot to be applied. Once
// an apply is consumed, the entries will be persisted to
// to raft storage concurrently; the application must read
// raftDone before assuming the raft messages are stable.
type apply struct {
entries []raftpb.Entry
snapshot raftpb.Snapshot
// notifyc synchronizes etcd server applies with the raft node
notifyc chan struct{}
}
type raftNode struct {
// Cache of the latest raft index and raft term the server has seen.
// These three unit64 fields must be the first elements to keep 64-bit
// alignment for atomic access to the fields.
index uint64
term uint64
lead uint64
raftNodeConfig
// a chan to send/receive snapshot
msgSnapC chan raftpb.Message
// a chan to send out apply
applyc chan apply
// a chan to send out readState
readStateC chan raft.ReadState
// utility
ticker *time.Ticker
// contention detectors for raft heartbeat message
td *contention.TimeoutDetector
stopped chan struct{}
done chan struct{}
}
type raftNodeConfig struct {
// to check if msg receiver is removed from cluster
isIDRemoved func(id uint64) bool
raft.Node
raftStorage *raft.MemoryStorage
storage Storage
heartbeat time.Duration // for logging
// transport specifies the transport to send and receive msgs to members.
// Sending messages MUST NOT block. It is okay to drop messages, since
// clients should timeout and reissue their messages.
// If transport is nil, server will panic.
transport rafthttp.Transporter
}
func newRaftNode(cfg raftNodeConfig) *raftNode {
r := &raftNode{
raftNodeConfig: cfg,
// set up contention detectors for raft heartbeat message.
// expect to send a heartbeat within 2 heartbeat intervals.
td: contention.NewTimeoutDetector(2 * cfg.heartbeat),
readStateC: make(chan raft.ReadState, 1),
msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap),
applyc: make(chan apply),
stopped: make(chan struct{}),
done: make(chan struct{}),
}
if r.heartbeat == 0 {
r.ticker = &time.Ticker{}
} else {
r.ticker = time.NewTicker(r.heartbeat)
}
return r
}
// start prepares and starts raftNode in a new goroutine. It is no longer safe
// to modify the fields after it has been started.
func (r *raftNode) start(rh *raftReadyHandler) {
internalTimeout := time.Second
go func() {
defer r.onStop()
islead := false
for {
select {
case <-r.ticker.C:
r.Tick()
case rd := <-r.Ready():
if rd.SoftState != nil {
newLeader := rd.SoftState.Lead != raft.None && atomic.LoadUint64(&r.lead) != rd.SoftState.Lead
if newLeader {
leaderChanges.Inc()
}
if rd.SoftState.Lead == raft.None {
hasLeader.Set(0)
} else {
hasLeader.Set(1)
}
atomic.StoreUint64(&r.lead, rd.SoftState.Lead)
islead = rd.RaftState == raft.StateLeader
rh.updateLeadership(newLeader)
r.td.Reset()
}
if len(rd.ReadStates) != 0 {
select {
case r.readStateC <- rd.ReadStates[len(rd.ReadStates)-1]:
case <-time.After(internalTimeout):
plog.Warningf("timed out sending read state")
case <-r.stopped:
return
}
}
notifyc := make(chan struct{}, 1)
ap := apply{
entries: rd.CommittedEntries,
snapshot: rd.Snapshot,
notifyc: notifyc,
}
updateCommittedIndex(&ap, rh)
select {
case r.applyc <- ap:
case <-r.stopped:
return
}
// the leader can write to its disk in parallel with replicating to the followers and them
// writing to their disks.
// For more details, check raft thesis 10.2.1
if islead {
// gofail: var raftBeforeLeaderSend struct{}
r.transport.Send(r.processMessages(rd.Messages))
}
// gofail: var raftBeforeSave struct{}
if err := r.storage.Save(rd.HardState, rd.Entries); err != nil {
plog.Fatalf("raft save state and entries error: %v", err)
}
if !raft.IsEmptyHardState(rd.HardState) {
proposalsCommitted.Set(float64(rd.HardState.Commit))
}
// gofail: var raftAfterSave struct{}
if !raft.IsEmptySnap(rd.Snapshot) {
// gofail: var raftBeforeSaveSnap struct{}
if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
plog.Fatalf("raft save snapshot error: %v", err)
}
// etcdserver now claim the snapshot has been persisted onto the disk
notifyc <- struct{}{}
// gofail: var raftAfterSaveSnap struct{}
r.raftStorage.ApplySnapshot(rd.Snapshot)
plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index)
// gofail: var raftAfterApplySnap struct{}
}
r.raftStorage.Append(rd.Entries)
if !islead {
// finish processing incoming messages before we signal raftdone chan
msgs := r.processMessages(rd.Messages)
// now unblocks 'applyAll' that waits on Raft log disk writes before triggering snapshots
notifyc <- struct{}{}
// Candidate or follower needs to wait for all pending configuration
// changes to be applied before sending messages.
// Otherwise we might incorrectly count votes (e.g. votes from removed members).
// Also slow machine's follower raft-layer could proceed to become the leader
// on its own single-node cluster, before apply-layer applies the config change.
// We simply wait for ALL pending entries to be applied for now.
// We might improve this later on if it causes unnecessary long blocking issues.
waitApply := false
for _, ent := range rd.CommittedEntries {
if ent.Type == raftpb.EntryConfChange {
waitApply = true
break
}
}
if waitApply {
// blocks until 'applyAll' calls 'applyWait.Trigger'
// to be in sync with scheduled config-change job
// (assume notifyc has cap of 1)
select {
case notifyc <- struct{}{}:
case <-r.stopped:
return
}
}
// gofail: var raftBeforeFollowerSend struct{}
r.transport.Send(msgs)
} else {
// leader already processed 'MsgSnap' and signaled
notifyc <- struct{}{}
}
r.Advance()
case <-r.stopped:
return
}
}
}()
}
func updateCommittedIndex(ap *apply, rh *raftReadyHandler) {
var ci uint64
if len(ap.entries) != 0 {
ci = ap.entries[len(ap.entries)-1].Index
}
if ap.snapshot.Metadata.Index > ci {
ci = ap.snapshot.Metadata.Index
}
if ci != 0 {
rh.updateCommittedIndex(ci)
}
}
func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message {
sentAppResp := false
for i := len(ms) - 1; i >= 0; i-- {
if r.isIDRemoved(ms[i].To) {
ms[i].To = 0
}
if ms[i].Type == raftpb.MsgAppResp {
if sentAppResp {
ms[i].To = 0
} else {
sentAppResp = true
}
}
if ms[i].Type == raftpb.MsgSnap {
// There are two separate data store: the store for v2, and the KV for v3.
// The msgSnap only contains the most recent snapshot of store without KV.
// So we need to redirect the msgSnap to etcd server main loop for merging in the
// current store snapshot and KV snapshot.
select {
case r.msgSnapC <- ms[i]:
default:
// drop msgSnap if the inflight chan if full.
}
ms[i].To = 0
}
if ms[i].Type == raftpb.MsgHeartbeat {
ok, exceed := r.td.Observe(ms[i].To)
if !ok {
// TODO: limit request rate.
plog.Warningf("failed to send out heartbeat on time (exceeded the %v timeout for %v)", r.heartbeat, exceed)
plog.Warningf("server is likely overloaded")
}
}
}
return ms
}
func (r *raftNode) apply() chan apply {
return r.applyc
}
func (r *raftNode) stop() {
r.stopped <- struct{}{}
<-r.done
}
func (r *raftNode) onStop() {
r.Stop()
r.ticker.Stop()
r.transport.Stop()
if err := r.storage.Close(); err != nil {
plog.Panicf("raft close storage error: %v", err)
}
close(r.done)
}
// for testing
func (r *raftNode) pauseSending() {
p := r.transport.(rafthttp.Pausable)
p.Pause()
}
func (r *raftNode) resumeSending() {
p := r.transport.(rafthttp.Pausable)
p.Resume()
}
// advanceTicksForElection advances ticks to the node for fast election.
// This reduces the time to wait for first leader election if bootstrapping the whole
// cluster, while leaving at least 1 heartbeat for possible existing leader
// to contact it.
func advanceTicksForElection(n raft.Node, electionTicks int) {
for i := 0; i < electionTicks-1; i++ {
n.Tick()
}
}
func startNode(cfg *ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
var err error
member := cl.MemberByName(cfg.Name)
metadata := pbutil.MustMarshal(
&pb.Metadata{
NodeID: uint64(member.ID),
ClusterID: uint64(cl.ID()),
},
)
if w, err = wal.Create(cfg.WALDir(), metadata); err != nil {
plog.Fatalf("create wal error: %v", err)
}
peers := make([]raft.Peer, len(ids))
for i, id := range ids {
ctx, err := json.Marshal((*cl).Member(id))
if err != nil {
plog.Panicf("marshal member should never fail: %v", err)
}
peers[i] = raft.Peer{ID: uint64(id), Context: ctx}
}
id = member.ID
plog.Infof("starting member %s in cluster %s", id, cl.ID())
s = raft.NewMemoryStorage()
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
CheckQuorum: true,
}
n = raft.StartNode(c, peers)
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return
}
func restartNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *membership.RaftCluster, raft.Node, *raft.MemoryStorage, *wal.WAL) {
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
}
w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap)
plog.Infof("restarting member %s in cluster %s at commit index %d", id, cid, st.Commit)
cl := membership.NewCluster("")
cl.SetID(cid)
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
}
s.SetHardState(st)
s.Append(ents)
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
CheckQuorum: true,
}
n := raft.RestartNode(c)
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, cl, n, s, w
}
func restartAsStandaloneNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *membership.RaftCluster, raft.Node, *raft.MemoryStorage, *wal.WAL) {
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
}
w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap)
// discard the previously uncommitted entries
for i, ent := range ents {
if ent.Index > st.Commit {
plog.Infof("discarding %d uncommitted WAL entries ", len(ents)-i)
ents = ents[:i]
break
}
}
// force append the configuration change entries
toAppEnts := createConfigChangeEnts(getIDs(snapshot, ents), uint64(id), st.Term, st.Commit)
ents = append(ents, toAppEnts...)
// force commit newly appended entries
err := w.Save(raftpb.HardState{}, toAppEnts)
if err != nil {
plog.Fatalf("%v", err)
}
if len(ents) != 0 {
st.Commit = ents[len(ents)-1].Index
}
plog.Printf("forcing restart of member %s in cluster %s at commit index %d", id, cid, st.Commit)
cl := membership.NewCluster("")
cl.SetID(cid)
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
}
s.SetHardState(st)
s.Append(ents)
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
}
n := raft.RestartNode(c)
raftStatus = n.Status
return id, cl, n, s, w
}
// getIDs returns an ordered set of IDs included in the given snapshot and
// the entries. The given snapshot/entries can contain two kinds of
// ID-related entry:
// - ConfChangeAddNode, in which case the contained ID will be added into the set.
// - ConfChangeRemoveNode, in which case the contained ID will be removed from the set.
func getIDs(snap *raftpb.Snapshot, ents []raftpb.Entry) []uint64 {
ids := make(map[uint64]bool)
if snap != nil {
for _, id := range snap.Metadata.ConfState.Nodes {
ids[id] = true
}
}
for _, e := range ents {
if e.Type != raftpb.EntryConfChange {
continue
}
var cc raftpb.ConfChange
pbutil.MustUnmarshal(&cc, e.Data)
switch cc.Type {
case raftpb.ConfChangeAddNode:
ids[cc.NodeID] = true
case raftpb.ConfChangeRemoveNode:
delete(ids, cc.NodeID)
case raftpb.ConfChangeUpdateNode:
// do nothing
default:
plog.Panicf("ConfChange Type should be either ConfChangeAddNode or ConfChangeRemoveNode!")
}
}
sids := make(types.Uint64Slice, 0, len(ids))
for id := range ids {
sids = append(sids, id)
}
sort.Sort(sids)
return []uint64(sids)
}
// createConfigChangeEnts creates a series of Raft entries (i.e.
// EntryConfChange) to remove the set of given IDs from the cluster. The ID
// `self` is _not_ removed, even if present in the set.
// If `self` is not inside the given ids, it creates a Raft entry to add a
// default member with the given `self`.
func createConfigChangeEnts(ids []uint64, self uint64, term, index uint64) []raftpb.Entry {
ents := make([]raftpb.Entry, 0)
next := index + 1
found := false
for _, id := range ids {
if id == self {
found = true
continue
}
cc := &raftpb.ConfChange{
Type: raftpb.ConfChangeRemoveNode,
NodeID: id,
}
e := raftpb.Entry{
Type: raftpb.EntryConfChange,
Data: pbutil.MustMarshal(cc),
Term: term,
Index: next,
}
ents = append(ents, e)
next++
}
if !found {
m := membership.Member{
ID: types.ID(self),
RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"http://localhost:2380"}},
}
ctx, err := json.Marshal(m)
if err != nil {
plog.Panicf("marshal member should never fail: %v", err)
}
cc := &raftpb.ConfChange{
Type: raftpb.ConfChangeAddNode,
NodeID: self,
Context: ctx,
}
e := raftpb.Entry{
Type: raftpb.EntryConfChange,
Data: pbutil.MustMarshal(cc),
Term: term,
Index: next,
}
ents = append(ents, e)
}
return ents
}

1659
vendor/github.com/coreos/etcd/etcdserver/server.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"io"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
)
// createMergedSnapshotMessage creates a snapshot message that contains: raft status (term, conf),
// a snapshot of v2 store inside raft.Snapshot as []byte, a snapshot of v3 KV in the top level message
// as ReadCloser.
func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapt, snapi uint64, confState raftpb.ConfState) snap.Message {
// get a snapshot of v2 store as []byte
clone := s.store.Clone()
d, err := clone.SaveNoCopy()
if err != nil {
plog.Panicf("store save should never fail: %v", err)
}
// commit kv to write metadata(for example: consistent index).
s.KV().Commit()
dbsnap := s.be.Snapshot()
// get a snapshot of v3 KV as readCloser
rc := newSnapshotReaderCloser(dbsnap)
// put the []byte snapshot of store into raft snapshot and return the merged snapshot with
// KV readCloser snapshot.
snapshot := raftpb.Snapshot{
Metadata: raftpb.SnapshotMetadata{
Index: snapi,
Term: snapt,
ConfState: confState,
},
Data: d,
}
m.Snapshot = snapshot
return *snap.NewMessage(m, rc, dbsnap.Size())
}
func newSnapshotReaderCloser(snapshot backend.Snapshot) io.ReadCloser {
pr, pw := io.Pipe()
go func() {
n, err := snapshot.WriteTo(pw)
if err == nil {
plog.Infof("wrote database snapshot out [total bytes: %d]", n)
} else {
plog.Warningf("failed to write database snapshot out [written bytes: %d]: %v", n, err)
}
pw.CloseWithError(err)
err = snapshot.Close()
if err != nil {
plog.Panicf("failed to close database snapshot: %v", err)
}
}()
return pr
}

98
vendor/github.com/coreos/etcd/etcdserver/storage.go generated vendored Normal file
View file

@ -0,0 +1,98 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"io"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
)
type Storage interface {
// Save function saves ents and state to the underlying stable storage.
// Save MUST block until st and ents are on stable storage.
Save(st raftpb.HardState, ents []raftpb.Entry) error
// SaveSnap function saves snapshot to the underlying stable storage.
SaveSnap(snap raftpb.Snapshot) error
// Close closes the Storage and performs finalization.
Close() error
}
type storage struct {
*wal.WAL
*snap.Snapshotter
}
func NewStorage(w *wal.WAL, s *snap.Snapshotter) Storage {
return &storage{w, s}
}
// SaveSnap saves the snapshot to disk and release the locked
// wal files since they will not be used.
func (st *storage) SaveSnap(snap raftpb.Snapshot) error {
walsnap := walpb.Snapshot{
Index: snap.Metadata.Index,
Term: snap.Metadata.Term,
}
err := st.WAL.SaveSnapshot(walsnap)
if err != nil {
return err
}
err = st.Snapshotter.SaveSnap(snap)
if err != nil {
return err
}
return st.WAL.ReleaseLockTo(snap.Metadata.Index)
}
func readWAL(waldir string, snap walpb.Snapshot) (w *wal.WAL, id, cid types.ID, st raftpb.HardState, ents []raftpb.Entry) {
var (
err error
wmetadata []byte
)
repaired := false
for {
if w, err = wal.Open(waldir, snap); err != nil {
plog.Fatalf("open wal error: %v", err)
}
if wmetadata, st, ents, err = w.ReadAll(); err != nil {
w.Close()
// we can only repair ErrUnexpectedEOF and we never repair twice.
if repaired || err != io.ErrUnexpectedEOF {
plog.Fatalf("read wal error (%v) and cannot be repaired", err)
}
if !wal.Repair(waldir) {
plog.Fatalf("WAL error (%v) cannot be repaired", err)
} else {
plog.Infof("repaired WAL error (%v)", err)
repaired = true
}
continue
}
break
}
var metadata pb.Metadata
pbutil.MustUnmarshal(&metadata, wmetadata)
id = types.ID(metadata.NodeID)
cid = types.ID(metadata.ClusterID)
return
}

97
vendor/github.com/coreos/etcd/etcdserver/util.go generated vendored Normal file
View file

@ -0,0 +1,97 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/rafthttp"
)
// isConnectedToQuorumSince checks whether the local member is connected to the
// quorum of the cluster since the given time.
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
}
// isConnectedSince checks whether the local member is connected to the
// remote member since the given time.
func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote types.ID) bool {
t := transport.ActiveSince(remote)
return !t.IsZero() && t.Before(since)
}
// isConnectedFullySince checks whether the local member is connected to all
// members in the cluster since the given time.
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
return numConnectedSince(transport, since, self, members) == len(members)
}
// numConnectedSince counts how many members are connected to the local member
// since the given time.
func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
connectedNum := 0
for _, m := range members {
if m.ID == self || isConnectedSince(transport, since, m.ID) {
connectedNum++
}
}
return connectedNum
}
// longestConnected chooses the member with longest active-since-time.
// It returns false, if nothing is active.
func longestConnected(tp rafthttp.Transporter, membs []types.ID) (types.ID, bool) {
var longest types.ID
var oldest time.Time
for _, id := range membs {
tm := tp.ActiveSince(id)
if tm.IsZero() { // inactive
continue
}
if oldest.IsZero() { // first longest candidate
oldest = tm
longest = id
}
if tm.Before(oldest) {
oldest = tm
longest = id
}
}
if uint64(longest) == 0 {
return longest, false
}
return longest, true
}
type notifier struct {
c chan struct{}
err error
}
func newNotifier() *notifier {
return &notifier{
c: make(chan struct{}),
}
}
func (nc *notifier) notify(err error) {
nc.err = err
close(nc.c)
}

125
vendor/github.com/coreos/etcd/etcdserver/v2_server.go generated vendored Normal file
View file

@ -0,0 +1,125 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type v2API interface {
Post(ctx context.Context, r *pb.Request) (Response, error)
Put(ctx context.Context, r *pb.Request) (Response, error)
Delete(ctx context.Context, r *pb.Request) (Response, error)
QGet(ctx context.Context, r *pb.Request) (Response, error)
Get(ctx context.Context, r *pb.Request) (Response, error)
Head(ctx context.Context, r *pb.Request) (Response, error)
}
type v2apiStore struct{ s *EtcdServer }
func (a *v2apiStore) Post(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Put(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Delete(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) QGet(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) processRaftRequest(ctx context.Context, r *pb.Request) (Response, error) {
data, err := r.Marshal()
if err != nil {
return Response{}, err
}
ch := a.s.w.Register(r.ID)
start := time.Now()
a.s.r.Propose(ctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
resp := x.(Response)
return resp, resp.err
case <-ctx.Done():
proposalsFailed.Inc()
a.s.w.Trigger(r.ID, nil) // GC wait
return Response{}, a.s.parseProposeCtxErr(ctx.Err(), start)
case <-a.s.stopping:
}
return Response{}, ErrStopped
}
func (a *v2apiStore) Get(ctx context.Context, r *pb.Request) (Response, error) {
if r.Wait {
wc, err := a.s.store.Watch(r.Path, r.Recursive, r.Stream, r.Since)
if err != nil {
return Response{}, err
}
return Response{Watcher: wc}, nil
}
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
func (a *v2apiStore) Head(ctx context.Context, r *pb.Request) (Response, error) {
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
// Do interprets r and performs an operation on s.store according to r.Method
// and other fields. If r.Method is "POST", "PUT", "DELETE", or a "GET" with
// Quorum == true, r will be sent through consensus before performing its
// respective operation. Do will block until an action is performed or there is
// an error.
func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
r.ID = s.reqIDGen.Next()
if r.Method == "GET" && r.Quorum {
r.Method = "QGET"
}
v2api := (v2API)(&v2apiStore{s})
switch r.Method {
case "POST":
return v2api.Post(ctx, &r)
case "PUT":
return v2api.Put(ctx, &r)
case "DELETE":
return v2api.Delete(ctx, &r)
case "QGET":
return v2api.QGet(ctx, &r)
case "GET":
return v2api.Get(ctx, &r)
case "HEAD":
return v2api.Head(ctx, &r)
}
return Response{}, ErrUnknownMethod
}

692
vendor/github.com/coreos/etcd/etcdserver/v3_server.go generated vendored Normal file
View file

@ -0,0 +1,692 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"encoding/binary"
"time"
"github.com/gogo/protobuf/proto"
"github.com/coreos/etcd/auth"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/lease/leasehttp"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/raft"
"golang.org/x/net/context"
)
const (
// the max request size that raft accepts.
// TODO: make this a flag? But we probably do not want to
// accept large request which might block raft stream. User
// specify a large value might end up with shooting in the foot.
maxRequestBytes = 1.5 * 1024 * 1024
// In the health case, there might be a small gap (10s of entries) between
// the applied index and committed index.
// However, if the committed entries are very heavy to apply, the gap might grow.
// We should stop accepting new proposals if the gap growing to a certain point.
maxGapBetweenApplyAndCommitIndex = 5000
)
type RaftKV interface {
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
}
type Lessor interface {
// LeaseGrant sends LeaseGrant request to raft and apply it after committed.
LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
// LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
// is returned.
LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error)
// LeaseTimeToLive retrieves lease information.
LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error)
}
type Authenticator interface {
AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error)
Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error)
UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if !r.Serializable {
err := s.linearizableReadNotify(ctx)
if err != nil {
return nil, err
}
}
var resp *pb.RangeResponse
var err error
chk := func(ai *auth.AuthInfo) error {
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
}
get := func() { resp, err = s.applyV3Base.Range(nil, r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr
}
return resp, err
}
func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Put: r})
if err != nil {
return nil, err
}
return resp.(*pb.PutResponse), nil
}
func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
if err != nil {
return nil, err
}
return resp.(*pb.DeleteRangeResponse), nil
}
func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if isTxnReadonly(r) {
if !isTxnSerializable(r) {
err := s.linearizableReadNotify(ctx)
if err != nil {
return nil, err
}
}
var resp *pb.TxnResponse
var err error
chk := func(ai *auth.AuthInfo) error {
return checkTxnAuth(s.authStore, ai, r)
}
get := func() { resp, err = s.applyV3Base.Txn(r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr
}
return resp, err
}
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Txn: r})
if err != nil {
return nil, err
}
return resp.(*pb.TxnResponse), nil
}
func isTxnSerializable(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
return true
}
func isTxnReadonly(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil {
return false
}
}
return true
}
func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
result, err := s.processInternalRaftRequestOnce(ctx, pb.InternalRaftRequest{Compaction: r})
if r.Physical && result != nil && result.physc != nil {
<-result.physc
// The compaction is done deleting keys; the hash is now settled
// but the data is not necessarily committed. If there's a crash,
// the hash may revert to a hash prior to compaction completing
// if the compaction resumes. Force the finished compaction to
// commit so it won't resume following a crash.
s.be.ForceCommit()
}
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
resp := result.resp.(*pb.CompactionResponse)
if resp == nil {
resp = &pb.CompactionResponse{}
}
if resp.Header == nil {
resp.Header = &pb.ResponseHeader{}
}
resp.Header.Revision = s.kv.Rev()
return resp, nil
}
func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
// no id given? choose one
for r.ID == int64(lease.NoLease) {
// only use positive int64 id's
r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
}
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseGrant: r})
if err != nil {
return nil, err
}
return resp.(*pb.LeaseGrantResponse), nil
}
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
if err != nil {
return nil, err
}
return resp.(*pb.LeaseRevokeResponse), nil
}
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
ttl, err := s.lessor.Renew(id)
if err == nil { // already requested to primary lessor(leader)
return ttl, nil
}
if err != lease.ErrNotPrimary {
return -1, err
}
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
// renewals don't go through raft; forward to leader manually
for cctx.Err() == nil && err != nil {
leader, lerr := s.waitLeader(cctx)
if lerr != nil {
return -1, lerr
}
for _, url := range leader.PeerURLs {
lurl := url + leasehttp.LeasePrefix
ttl, err = leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
if err == nil || err == lease.ErrLeaseNotFound {
return ttl, err
}
}
}
return -1, ErrTimeout
}
func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
if s.Leader() == s.ID() {
// primary; timetolive directly from leader
le := s.lessor.Lookup(lease.LeaseID(r.ID))
if le == nil {
return nil, lease.ErrLeaseNotFound
}
// TODO: fill out ResponseHeader
resp := &pb.LeaseTimeToLiveResponse{Header: &pb.ResponseHeader{}, ID: r.ID, TTL: int64(le.Remaining().Seconds()), GrantedTTL: le.TTL()}
if r.Keys {
ks := le.Keys()
kbs := make([][]byte, len(ks))
for i := range ks {
kbs[i] = []byte(ks[i])
}
resp.Keys = kbs
}
return resp, nil
}
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
// forward to leader
for cctx.Err() == nil {
leader, err := s.waitLeader(cctx)
if err != nil {
return nil, err
}
for _, url := range leader.PeerURLs {
lurl := url + leasehttp.LeaseInternalPrefix
resp, err := leasehttp.TimeToLiveHTTP(cctx, lease.LeaseID(r.ID), r.Keys, lurl, s.peerRt)
if err == nil {
return resp.LeaseTimeToLiveResponse, nil
}
if err == lease.ErrLeaseNotFound {
return nil, err
}
}
}
return nil, ErrTimeout
}
func (s *EtcdServer) waitLeader(ctx context.Context) (*membership.Member, error) {
leader := s.cluster.Member(s.Leader())
for leader == nil {
// wait an election
dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
select {
case <-time.After(dur):
leader = s.cluster.Member(s.Leader())
case <-s.stopping:
return nil, ErrStopped
case <-ctx.Done():
return nil, ErrNoLeader
}
}
if leader == nil || len(leader.PeerURLs) == 0 {
return nil, ErrNoLeader
}
return leader, nil
}
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{Alarm: r})
if err != nil {
return nil, err
}
return resp.(*pb.AlarmResponse), nil
}
func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{AuthEnable: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthEnableResponse), nil
}
func (s *EtcdServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthDisable: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthDisableResponse), nil
}
func (s *EtcdServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
if err := s.linearizableReadNotify(ctx); err != nil {
return nil, err
}
var resp proto.Message
for {
checkedRevision, err := s.AuthStore().CheckPassword(r.Name, r.Password)
if err != nil {
if err != auth.ErrAuthNotEnabled {
plog.Errorf("invalid authentication request to user %s was issued", r.Name)
}
return nil, err
}
st, err := s.AuthStore().GenTokenPrefix()
if err != nil {
return nil, err
}
internalReq := &pb.InternalAuthenticateRequest{
Name: r.Name,
Password: r.Password,
SimpleToken: st,
}
resp, err = s.raftRequestOnce(ctx, pb.InternalRaftRequest{Authenticate: internalReq})
if err != nil {
return nil, err
}
if checkedRevision == s.AuthStore().Revision() {
break
}
plog.Infof("revision when password checked is obsolete, retrying")
}
return resp.(*pb.AuthenticateResponse), nil
}
func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserAddResponse), nil
}
func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserDeleteResponse), nil
}
func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserChangePasswordResponse), nil
}
func (s *EtcdServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGrantRole: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserGrantRoleResponse), nil
}
func (s *EtcdServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGet: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserGetResponse), nil
}
func (s *EtcdServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserList: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserListResponse), nil
}
func (s *EtcdServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserRevokeRole: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserRevokeRoleResponse), nil
}
func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleAddResponse), nil
}
func (s *EtcdServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGrantPermission: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleGrantPermissionResponse), nil
}
func (s *EtcdServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGet: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleGetResponse), nil
}
func (s *EtcdServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleList: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleListResponse), nil
}
func (s *EtcdServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleRevokePermission: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleRevokePermissionResponse), nil
}
func (s *EtcdServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleDelete: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleDeleteResponse), nil
}
func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
result, err := s.processInternalRaftRequestOnce(ctx, r)
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp, nil
}
func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
for {
resp, err := s.raftRequestOnce(ctx, r)
if err != auth.ErrAuthOldRevision {
return resp, err
}
}
}
// doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure.
func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error {
for {
ai, err := s.AuthInfoFromCtx(ctx)
if err != nil {
return err
}
if ai == nil {
// chk expects non-nil AuthInfo; use empty credentials
ai = &auth.AuthInfo{}
}
if err = chk(ai); err != nil {
if err == auth.ErrAuthOldRevision {
continue
}
return err
}
// fetch response for serialized request
get()
// empty credentials or current auth info means no need to retry
if ai.Revision == 0 || ai.Revision == s.authStore.Revision() {
return nil
}
// avoid TOCTOU error, retry of the request is required.
}
}
func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
ai := s.getAppliedIndex()
ci := s.getCommittedIndex()
if ci > ai+maxGapBetweenApplyAndCommitIndex {
return nil, ErrTooManyRequests
}
r.Header = &pb.RequestHeader{
ID: s.reqIDGen.Next(),
}
authInfo, err := s.AuthInfoFromCtx(ctx)
if err != nil {
return nil, err
}
if authInfo != nil {
r.Header.Username = authInfo.Username
r.Header.AuthRevision = authInfo.Revision
}
data, err := r.Marshal()
if err != nil {
return nil, err
}
if len(data) > maxRequestBytes {
return nil, ErrRequestTooLarge
}
id := r.ID
if id == 0 {
id = r.Header.ID
}
ch := s.w.Register(id)
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
start := time.Now()
s.r.Propose(cctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
return x.(*applyResult), nil
case <-cctx.Done():
proposalsFailed.Inc()
s.w.Trigger(id, nil) // GC wait
return nil, s.parseProposeCtxErr(cctx.Err(), start)
case <-s.done:
return nil, ErrStopped
}
}
// Watchable returns a watchable interface attached to the etcdserver.
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
func (s *EtcdServer) linearizableReadLoop() {
var rs raft.ReadState
for {
ctx := make([]byte, 8)
binary.BigEndian.PutUint64(ctx, s.reqIDGen.Next())
select {
case <-s.readwaitc:
case <-s.stopping:
return
}
nextnr := newNotifier()
s.readMu.Lock()
nr := s.readNotifier
s.readNotifier = nextnr
s.readMu.Unlock()
cctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
if err := s.r.ReadIndex(cctx, ctx); err != nil {
cancel()
if err == raft.ErrStopped {
return
}
plog.Errorf("failed to get read index from raft: %v", err)
nr.notify(err)
continue
}
cancel()
var (
timeout bool
done bool
)
for !timeout && !done {
select {
case rs = <-s.r.readStateC:
done = bytes.Equal(rs.RequestCtx, ctx)
if !done {
// a previous request might time out. now we should ignore the response of it and
// continue waiting for the response of the current requests.
plog.Warningf("ignored out-of-date read index response (want %v, got %v)", rs.RequestCtx, ctx)
}
case <-time.After(s.Cfg.ReqTimeout()):
plog.Warningf("timed out waiting for read index response")
nr.notify(ErrTimeout)
timeout = true
case <-s.stopping:
return
}
}
if !done {
continue
}
if ai := s.getAppliedIndex(); ai < rs.Index {
select {
case <-s.applyWait.Wait(rs.Index):
case <-s.stopping:
return
}
}
// unblock all l-reads requested at indices before rs.Index
nr.notify(nil)
}
}
func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error {
s.readMu.RLock()
nc := s.readNotifier
s.readMu.RUnlock()
// signal linearizable loop for current notify if it hasn't been already
select {
case s.readwaitc <- struct{}{}:
default:
}
// wait for read state notification
select {
case <-nc.c:
return nc.err
case <-ctx.Done():
return ctx.Err()
case <-s.done:
return ErrStopped
}
}
func (s *EtcdServer) AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error) {
if s.Cfg.ClientCertAuthEnabled {
authInfo := s.AuthStore().AuthInfoFromTLS(ctx)
if authInfo != nil {
return authInfo, nil
}
}
return s.AuthStore().AuthInfoFromCtx(ctx)
}

16
vendor/github.com/coreos/etcd/mvcc/doc.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package mvcc defines etcd's stable MVCC storage.
package mvcc

219
vendor/github.com/coreos/etcd/mvcc/index.go generated vendored Normal file
View file

@ -0,0 +1,219 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sort"
"sync"
"github.com/google/btree"
)
type index interface {
Get(key []byte, atRev int64) (rev, created revision, ver int64, err error)
Range(key, end []byte, atRev int64) ([][]byte, []revision)
Put(key []byte, rev revision)
Tombstone(key []byte, rev revision) error
RangeSince(key, end []byte, rev int64) []revision
Compact(rev int64) map[revision]struct{}
Equal(b index) bool
Insert(ki *keyIndex)
KeyIndex(ki *keyIndex) *keyIndex
}
type treeIndex struct {
sync.RWMutex
tree *btree.BTree
}
func newTreeIndex() index {
return &treeIndex{
tree: btree.New(32),
}
}
func (ti *treeIndex) Put(key []byte, rev revision) {
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
keyi.put(rev.main, rev.sub)
ti.tree.ReplaceOrInsert(keyi)
return
}
okeyi := item.(*keyIndex)
okeyi.put(rev.main, rev.sub)
}
func (ti *treeIndex) Get(key []byte, atRev int64) (modified, created revision, ver int64, err error) {
keyi := &keyIndex{key: key}
ti.RLock()
defer ti.RUnlock()
if keyi = ti.keyIndex(keyi); keyi == nil {
return revision{}, revision{}, 0, ErrRevisionNotFound
}
return keyi.get(atRev)
}
func (ti *treeIndex) KeyIndex(keyi *keyIndex) *keyIndex {
ti.RLock()
defer ti.RUnlock()
return ti.keyIndex(keyi)
}
func (ti *treeIndex) keyIndex(keyi *keyIndex) *keyIndex {
if item := ti.tree.Get(keyi); item != nil {
return item.(*keyIndex)
}
return nil
}
func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []revision) {
if end == nil {
rev, _, _, err := ti.Get(key, atRev)
if err != nil {
return nil, nil
}
return [][]byte{key}, []revision{rev}
}
keyi := &keyIndex{key: key}
endi := &keyIndex{key: end}
ti.RLock()
defer ti.RUnlock()
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
return false
}
curKeyi := item.(*keyIndex)
rev, _, _, err := curKeyi.get(atRev)
if err != nil {
return true
}
revs = append(revs, rev)
keys = append(keys, curKeyi.key)
return true
})
return keys, revs
}
func (ti *treeIndex) Tombstone(key []byte, rev revision) error {
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
return ErrRevisionNotFound
}
ki := item.(*keyIndex)
return ki.tombstone(rev.main, rev.sub)
}
// RangeSince returns all revisions from key(including) to end(excluding)
// at or after the given rev. The returned slice is sorted in the order
// of revision.
func (ti *treeIndex) RangeSince(key, end []byte, rev int64) []revision {
ti.RLock()
defer ti.RUnlock()
keyi := &keyIndex{key: key}
if end == nil {
item := ti.tree.Get(keyi)
if item == nil {
return nil
}
keyi = item.(*keyIndex)
return keyi.since(rev)
}
endi := &keyIndex{key: end}
var revs []revision
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
return false
}
curKeyi := item.(*keyIndex)
revs = append(revs, curKeyi.since(rev)...)
return true
})
sort.Sort(revisions(revs))
return revs
}
func (ti *treeIndex) Compact(rev int64) map[revision]struct{} {
available := make(map[revision]struct{})
var emptyki []*keyIndex
plog.Printf("store.index: compact %d", rev)
// TODO: do not hold the lock for long time?
// This is probably OK. Compacting 10M keys takes O(10ms).
ti.Lock()
defer ti.Unlock()
ti.tree.Ascend(compactIndex(rev, available, &emptyki))
for _, ki := range emptyki {
item := ti.tree.Delete(ki)
if item == nil {
plog.Panic("store.index: unexpected delete failure during compaction")
}
}
return available
}
func compactIndex(rev int64, available map[revision]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool {
return func(i btree.Item) bool {
keyi := i.(*keyIndex)
keyi.compact(rev, available)
if keyi.isEmpty() {
*emptyki = append(*emptyki, keyi)
}
return true
}
}
func (a *treeIndex) Equal(bi index) bool {
b := bi.(*treeIndex)
if a.tree.Len() != b.tree.Len() {
return false
}
equal := true
a.tree.Ascend(func(item btree.Item) bool {
aki := item.(*keyIndex)
bki := b.tree.Get(item).(*keyIndex)
if !aki.equal(bki) {
equal = false
return false
}
return true
})
return equal
}
func (ti *treeIndex) Insert(ki *keyIndex) {
ti.Lock()
defer ti.Unlock()
ti.tree.ReplaceOrInsert(ki)
}

332
vendor/github.com/coreos/etcd/mvcc/key_index.go generated vendored Normal file
View file

@ -0,0 +1,332 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"bytes"
"errors"
"fmt"
"github.com/google/btree"
)
var (
ErrRevisionNotFound = errors.New("mvcc: revision not found")
)
// keyIndex stores the revisions of a key in the backend.
// Each keyIndex has at least one key generation.
// Each generation might have several key versions.
// Tombstone on a key appends an tombstone version at the end
// of the current generation and creates a new empty generation.
// Each version of a key has an index pointing to the backend.
//
// For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
// generate a keyIndex:
// key: "foo"
// rev: 5
// generations:
// {empty}
// {4.0, 5.0(t)}
// {1.0, 2.0, 3.0(t)}
//
// Compact a keyIndex removes the versions with smaller or equal to
// rev except the largest one. If the generation becomes empty
// during compaction, it will be removed. if all the generations get
// removed, the keyIndex should be removed.
// For example:
// compact(2) on the previous example
// generations:
// {empty}
// {4.0, 5.0(t)}
// {2.0, 3.0(t)}
//
// compact(4)
// generations:
// {empty}
// {4.0, 5.0(t)}
//
// compact(5):
// generations:
// {empty} -> key SHOULD be removed.
//
// compact(6):
// generations:
// {empty} -> key SHOULD be removed.
type keyIndex struct {
key []byte
modified revision // the main rev of the last modification
generations []generation
}
// put puts a revision to the keyIndex.
func (ki *keyIndex) put(main int64, sub int64) {
rev := revision{main: main, sub: sub}
if !rev.GreaterThan(ki.modified) {
plog.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified)
}
if len(ki.generations) == 0 {
ki.generations = append(ki.generations, generation{})
}
g := &ki.generations[len(ki.generations)-1]
if len(g.revs) == 0 { // create a new key
keysGauge.Inc()
g.created = rev
}
g.revs = append(g.revs, rev)
g.ver++
ki.modified = rev
}
func (ki *keyIndex) restore(created, modified revision, ver int64) {
if len(ki.generations) != 0 {
plog.Panicf("store.keyindex: cannot restore non-empty keyIndex")
}
ki.modified = modified
g := generation{created: created, ver: ver, revs: []revision{modified}}
ki.generations = append(ki.generations, g)
keysGauge.Inc()
}
// tombstone puts a revision, pointing to a tombstone, to the keyIndex.
// It also creates a new empty generation in the keyIndex.
// It returns ErrRevisionNotFound when tombstone on an empty generation.
func (ki *keyIndex) tombstone(main int64, sub int64) error {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
}
if ki.generations[len(ki.generations)-1].isEmpty() {
return ErrRevisionNotFound
}
ki.put(main, sub)
ki.generations = append(ki.generations, generation{})
keysGauge.Dec()
return nil
}
// get gets the modified, created revision and version of the key that satisfies the given atRev.
// Rev must be higher than or equal to the given atRev.
func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
}
g := ki.findGeneration(atRev)
if g.isEmpty() {
return revision{}, revision{}, 0, ErrRevisionNotFound
}
n := g.walk(func(rev revision) bool { return rev.main > atRev })
if n != -1 {
return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil
}
return revision{}, revision{}, 0, ErrRevisionNotFound
}
// since returns revisions since the given rev. Only the revision with the
// largest sub revision will be returned if multiple revisions have the same
// main revision.
func (ki *keyIndex) since(rev int64) []revision {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
}
since := revision{rev, 0}
var gi int
// find the generations to start checking
for gi = len(ki.generations) - 1; gi > 0; gi-- {
g := ki.generations[gi]
if g.isEmpty() {
continue
}
if since.GreaterThan(g.created) {
break
}
}
var revs []revision
var last int64
for ; gi < len(ki.generations); gi++ {
for _, r := range ki.generations[gi].revs {
if since.GreaterThan(r) {
continue
}
if r.main == last {
// replace the revision with a new one that has higher sub value,
// because the original one should not be seen by external
revs[len(revs)-1] = r
continue
}
revs = append(revs, r)
last = r.main
}
}
return revs
}
// compact compacts a keyIndex by removing the versions with smaller or equal
// revision than the given atRev except the largest one (If the largest one is
// a tombstone, it will not be kept).
// If a generation becomes empty during compaction, it will be removed.
func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
}
// walk until reaching the first revision that has an revision smaller or equal to
// the atRev.
// add it to the available map
f := func(rev revision) bool {
if rev.main <= atRev {
available[rev] = struct{}{}
return false
}
return true
}
i, g := 0, &ki.generations[0]
// find first generation includes atRev or created after atRev
for i < len(ki.generations)-1 {
if tomb := g.revs[len(g.revs)-1].main; tomb > atRev {
break
}
i++
g = &ki.generations[i]
}
if !g.isEmpty() {
n := g.walk(f)
// remove the previous contents.
if n != -1 {
g.revs = g.revs[n:]
}
// remove any tombstone
if len(g.revs) == 1 && i != len(ki.generations)-1 {
delete(available, g.revs[0])
i++
}
}
// remove the previous generations.
ki.generations = ki.generations[i:]
}
func (ki *keyIndex) isEmpty() bool {
return len(ki.generations) == 1 && ki.generations[0].isEmpty()
}
// findGeneration finds out the generation of the keyIndex that the
// given rev belongs to. If the given rev is at the gap of two generations,
// which means that the key does not exist at the given rev, it returns nil.
func (ki *keyIndex) findGeneration(rev int64) *generation {
lastg := len(ki.generations) - 1
cg := lastg
for cg >= 0 {
if len(ki.generations[cg].revs) == 0 {
cg--
continue
}
g := ki.generations[cg]
if cg != lastg {
if tomb := g.revs[len(g.revs)-1].main; tomb <= rev {
return nil
}
}
if g.revs[0].main <= rev {
return &ki.generations[cg]
}
cg--
}
return nil
}
func (a *keyIndex) Less(b btree.Item) bool {
return bytes.Compare(a.key, b.(*keyIndex).key) == -1
}
func (a *keyIndex) equal(b *keyIndex) bool {
if !bytes.Equal(a.key, b.key) {
return false
}
if a.modified != b.modified {
return false
}
if len(a.generations) != len(b.generations) {
return false
}
for i := range a.generations {
ag, bg := a.generations[i], b.generations[i]
if !ag.equal(bg) {
return false
}
}
return true
}
func (ki *keyIndex) String() string {
var s string
for _, g := range ki.generations {
s += g.String()
}
return s
}
// generation contains multiple revisions of a key.
type generation struct {
ver int64
created revision // when the generation is created (put in first revision).
revs []revision
}
func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
// walk walks through the revisions in the generation in descending order.
// It passes the revision to the given function.
// walk returns until: 1. it finishes walking all pairs 2. the function returns false.
// walk returns the position at where it stopped. If it stopped after
// finishing walking, -1 will be returned.
func (g *generation) walk(f func(rev revision) bool) int {
l := len(g.revs)
for i := range g.revs {
ok := f(g.revs[l-i-1])
if !ok {
return l - i - 1
}
}
return -1
}
func (g *generation) String() string {
return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs)
}
func (a generation) equal(b generation) bool {
if a.ver != b.ver {
return false
}
if len(a.revs) != len(b.revs) {
return false
}
for i := range a.revs {
ar, br := a.revs[i], b.revs[i]
if ar != br {
return false
}
}
return true
}

147
vendor/github.com/coreos/etcd/mvcc/kv.go generated vendored Normal file
View file

@ -0,0 +1,147 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type RangeOptions struct {
Limit int64
Rev int64
Count bool
}
type RangeResult struct {
KVs []mvccpb.KeyValue
Rev int64
Count int
}
type ReadView interface {
// FirstRev returns the first KV revision at the time of opening the txn.
// After a compaction, the first revision increases to the compaction
// revision.
FirstRev() int64
// Rev returns the revision of the KV at the time of opening the txn.
Rev() int64
// Range gets the keys in the range at rangeRev.
// The returned rev is the current revision of the KV when the operation is executed.
// If rangeRev <=0, range gets the keys at currentRev.
// If `end` is nil, the request returns the key.
// If `end` is not nil and not empty, it gets the keys in range [key, range_end).
// If `end` is not nil and empty, it gets the keys greater than or equal to key.
// Limit limits the number of keys returned.
// If the required rev is compacted, ErrCompacted will be returned.
Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error)
}
// TxnRead represents a read-only transaction with operations that will not
// block other read transactions.
type TxnRead interface {
ReadView
// End marks the transaction is complete and ready to commit.
End()
}
type WriteView interface {
// DeleteRange deletes the given range from the store.
// A deleteRange increases the rev of the store if any key in the range exists.
// The number of key deleted will be returned.
// The returned rev is the current revision of the KV when the operation is executed.
// It also generates one event for each key delete in the event history.
// if the `end` is nil, deleteRange deletes the key.
// if the `end` is not nil, deleteRange deletes the keys in range [key, range_end).
DeleteRange(key, end []byte) (n, rev int64)
// Put puts the given key, value into the store. Put also takes additional argument lease to
// attach a lease to a key-value pair as meta-data. KV implementation does not validate the lease
// id.
// A put also increases the rev of the store, and generates one event in the event history.
// The returned rev is the current revision of the KV when the operation is executed.
Put(key, value []byte, lease lease.LeaseID) (rev int64)
}
// TxnWrite represents a transaction that can modify the store.
type TxnWrite interface {
TxnRead
WriteView
// Changes gets the changes made since opening the write txn.
Changes() []mvccpb.KeyValue
}
// txnReadWrite coerces a read txn to a write, panicking on any write operation.
type txnReadWrite struct{ TxnRead }
func (trw *txnReadWrite) DeleteRange(key, end []byte) (n, rev int64) { panic("unexpected DeleteRange") }
func (trw *txnReadWrite) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
panic("unexpected Put")
}
func (trw *txnReadWrite) Changes() []mvccpb.KeyValue { return nil }
func NewReadOnlyTxnWrite(txn TxnRead) TxnWrite { return &txnReadWrite{txn} }
type KV interface {
ReadView
WriteView
// Read creates a read transaction.
Read() TxnRead
// Write creates a write transaction.
Write() TxnWrite
// Hash retrieves the hash of KV state and revision.
// This method is designed for consistency checking purposes.
Hash() (hash uint32, revision int64, err error)
// Compact frees all superseded keys with revisions less than rev.
Compact(rev int64) (<-chan struct{}, error)
// Commit commits outstanding txns into the underlying backend.
Commit()
// Restore restores the KV store from a backend.
Restore(b backend.Backend) error
Close() error
}
// WatchableKV is a KV that can be watched.
type WatchableKV interface {
KV
Watchable
}
// Watchable is the interface that wraps the NewWatchStream function.
type Watchable interface {
// NewWatchStream returns a WatchStream that can be used to
// watch events happened or happening on the KV.
NewWatchStream() WatchStream
}
// ConsistentWatchableKV is a WatchableKV that understands the consistency
// algorithm and consistent index.
// If the consistent index of executing entry is not larger than the
// consistent index of ConsistentWatchableKV, all operations in
// this entry are skipped and return empty response.
type ConsistentWatchableKV interface {
WatchableKV
// ConsistentIndex returns the current consistent index of the KV.
ConsistentIndex() uint64
}

53
vendor/github.com/coreos/etcd/mvcc/kv_view.go generated vendored Normal file
View file

@ -0,0 +1,53 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
)
type readView struct{ kv KV }
func (rv *readView) FirstRev() int64 {
tr := rv.kv.Read()
defer tr.End()
return tr.FirstRev()
}
func (rv *readView) Rev() int64 {
tr := rv.kv.Read()
defer tr.End()
return tr.Rev()
}
func (rv *readView) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
tr := rv.kv.Read()
defer tr.End()
return tr.Range(key, end, ro)
}
type writeView struct{ kv KV }
func (wv *writeView) DeleteRange(key, end []byte) (n, rev int64) {
tw := wv.kv.Write()
defer tw.End()
return tw.DeleteRange(key, end)
}
func (wv *writeView) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
tw := wv.kv.Write()
defer tw.End()
return tw.Put(key, value, lease)
}

459
vendor/github.com/coreos/etcd/mvcc/kvstore.go generated vendored Normal file
View file

@ -0,0 +1,459 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"errors"
"math"
"sync"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/schedule"
"github.com/coreos/pkg/capnslog"
"golang.org/x/net/context"
)
var (
keyBucketName = []byte("key")
metaBucketName = []byte("meta")
consistentIndexKeyName = []byte("consistent_index")
scheduledCompactKeyName = []byte("scheduledCompactRev")
finishedCompactKeyName = []byte("finishedCompactRev")
ErrCompacted = errors.New("mvcc: required revision has been compacted")
ErrFutureRev = errors.New("mvcc: required revision is a future revision")
ErrCanceled = errors.New("mvcc: watcher is canceled")
ErrClosed = errors.New("mvcc: closed")
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "mvcc")
)
const (
// markedRevBytesLen is the byte length of marked revision.
// The first `revBytesLen` bytes represents a normal revision. The last
// one byte is the mark.
markedRevBytesLen = revBytesLen + 1
markBytePosition = markedRevBytesLen - 1
markTombstone byte = 't'
)
var restoreChunkKeys = 10000 // non-const for testing
// ConsistentIndexGetter is an interface that wraps the Get method.
// Consistent index is the offset of an entry in a consistent replicated log.
type ConsistentIndexGetter interface {
// ConsistentIndex returns the consistent index of current executing entry.
ConsistentIndex() uint64
}
type store struct {
ReadView
WriteView
// mu read locks for txns and write locks for non-txn store changes.
mu sync.RWMutex
ig ConsistentIndexGetter
b backend.Backend
kvindex index
le lease.Lessor
// revMuLock protects currentRev and compactMainRev.
// Locked at end of write txn and released after write txn unlock lock.
// Locked before locking read txn and released after locking.
revMu sync.RWMutex
// currentRev is the revision of the last completed transaction.
currentRev int64
// compactMainRev is the main revision of the last compaction.
compactMainRev int64
// bytesBuf8 is a byte slice of length 8
// to avoid a repetitive allocation in saveIndex.
bytesBuf8 []byte
fifoSched schedule.Scheduler
stopc chan struct{}
}
// NewStore returns a new store. It is useful to create a store inside
// mvcc pkg. It should only be used for testing externally.
func NewStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *store {
s := &store{
b: b,
ig: ig,
kvindex: newTreeIndex(),
le: le,
currentRev: 1,
compactMainRev: -1,
bytesBuf8: make([]byte, 8),
fifoSched: schedule.NewFIFOScheduler(),
stopc: make(chan struct{}),
}
s.ReadView = &readView{s}
s.WriteView = &writeView{s}
if s.le != nil {
s.le.SetRangeDeleter(func() lease.TxnDelete { return s.Write() })
}
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafeCreateBucket(keyBucketName)
tx.UnsafeCreateBucket(metaBucketName)
tx.Unlock()
s.b.ForceCommit()
if err := s.restore(); err != nil {
// TODO: return the error instead of panic here?
panic("failed to recover store from backend")
}
return s
}
func (s *store) compactBarrier(ctx context.Context, ch chan struct{}) {
if ctx == nil || ctx.Err() != nil {
s.mu.Lock()
select {
case <-s.stopc:
default:
f := func(ctx context.Context) { s.compactBarrier(ctx, ch) }
s.fifoSched.Schedule(f)
}
s.mu.Unlock()
return
}
close(ch)
}
func (s *store) Hash() (hash uint32, revision int64, err error) {
s.b.ForceCommit()
h, err := s.b.Hash(DefaultIgnores)
return h, s.currentRev, err
}
func (s *store) Compact(rev int64) (<-chan struct{}, error) {
s.mu.Lock()
defer s.mu.Unlock()
s.revMu.Lock()
defer s.revMu.Unlock()
if rev <= s.compactMainRev {
ch := make(chan struct{})
f := func(ctx context.Context) { s.compactBarrier(ctx, ch) }
s.fifoSched.Schedule(f)
return ch, ErrCompacted
}
if rev > s.currentRev {
return nil, ErrFutureRev
}
start := time.Now()
s.compactMainRev = rev
rbytes := newRevBytes()
revToBytes(revision{main: rev}, rbytes)
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafePut(metaBucketName, scheduledCompactKeyName, rbytes)
tx.Unlock()
// ensure that desired compaction is persisted
s.b.ForceCommit()
keep := s.kvindex.Compact(rev)
ch := make(chan struct{})
var j = func(ctx context.Context) {
if ctx.Err() != nil {
s.compactBarrier(ctx, ch)
return
}
if !s.scheduleCompaction(rev, keep) {
s.compactBarrier(nil, ch)
return
}
close(ch)
}
s.fifoSched.Schedule(j)
indexCompactionPauseDurations.Observe(float64(time.Since(start) / time.Millisecond))
return ch, nil
}
// DefaultIgnores is a map of keys to ignore in hash checking.
var DefaultIgnores map[backend.IgnoreKey]struct{}
func init() {
DefaultIgnores = map[backend.IgnoreKey]struct{}{
// consistent index might be changed due to v2 internal sync, which
// is not controllable by the user.
{Bucket: string(metaBucketName), Key: string(consistentIndexKeyName)}: {},
}
}
func (s *store) Commit() {
s.mu.Lock()
defer s.mu.Unlock()
tx := s.b.BatchTx()
tx.Lock()
s.saveIndex(tx)
tx.Unlock()
s.b.ForceCommit()
}
func (s *store) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
close(s.stopc)
s.fifoSched.Stop()
s.b = b
s.kvindex = newTreeIndex()
s.currentRev = 1
s.compactMainRev = -1
s.fifoSched = schedule.NewFIFOScheduler()
s.stopc = make(chan struct{})
return s.restore()
}
func (s *store) restore() error {
reportDbTotalSizeInBytesMu.Lock()
b := s.b
reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) }
reportDbTotalSizeInBytesMu.Unlock()
min, max := newRevBytes(), newRevBytes()
revToBytes(revision{main: 1}, min)
revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)
keyToLease := make(map[string]lease.LeaseID)
// restore index
tx := s.b.BatchTx()
tx.Lock()
_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
if len(finishedCompactBytes) != 0 {
s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
plog.Printf("restore compact to %d", s.compactMainRev)
}
_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
scheduledCompact := int64(0)
if len(scheduledCompactBytes) != 0 {
scheduledCompact = bytesToRev(scheduledCompactBytes[0]).main
}
// index keys concurrently as they're loaded in from tx
keysGauge.Set(0)
rkvc, revc := restoreIntoIndex(s.kvindex)
for {
keys, vals := tx.UnsafeRange(keyBucketName, min, max, int64(restoreChunkKeys))
if len(keys) == 0 {
break
}
// rkvc blocks if the total pending keys exceeds the restore
// chunk size to keep keys from consuming too much memory.
restoreChunk(rkvc, keys, vals, keyToLease)
if len(keys) < restoreChunkKeys {
// partial set implies final set
break
}
// next set begins after where this one ended
newMin := bytesToRev(keys[len(keys)-1][:revBytesLen])
newMin.sub++
revToBytes(newMin, min)
}
close(rkvc)
s.currentRev = <-revc
// keys in the range [compacted revision -N, compaction] might all be deleted due to compaction.
// the correct revision should be set to compaction revision in the case, not the largest revision
// we have seen.
if s.currentRev < s.compactMainRev {
s.currentRev = s.compactMainRev
}
if scheduledCompact <= s.compactMainRev {
scheduledCompact = 0
}
for key, lid := range keyToLease {
if s.le == nil {
panic("no lessor to attach lease")
}
err := s.le.Attach(lid, []lease.LeaseItem{{Key: key}})
if err != nil {
plog.Errorf("unexpected Attach error: %v", err)
}
}
tx.Unlock()
if scheduledCompact != 0 {
s.Compact(scheduledCompact)
plog.Printf("resume scheduled compaction at %d", scheduledCompact)
}
return nil
}
type revKeyValue struct {
key []byte
kv mvccpb.KeyValue
kstr string
}
func restoreIntoIndex(idx index) (chan<- revKeyValue, <-chan int64) {
rkvc, revc := make(chan revKeyValue, restoreChunkKeys), make(chan int64, 1)
go func() {
currentRev := int64(1)
defer func() { revc <- currentRev }()
// restore the tree index from streaming the unordered index.
kiCache := make(map[string]*keyIndex, restoreChunkKeys)
for rkv := range rkvc {
ki, ok := kiCache[rkv.kstr]
// purge kiCache if many keys but still missing in the cache
if !ok && len(kiCache) >= restoreChunkKeys {
i := 10
for k := range kiCache {
delete(kiCache, k)
if i--; i == 0 {
break
}
}
}
// cache miss, fetch from tree index if there
if !ok {
ki = &keyIndex{key: rkv.kv.Key}
if idxKey := idx.KeyIndex(ki); idxKey != nil {
kiCache[rkv.kstr], ki = idxKey, idxKey
ok = true
}
}
rev := bytesToRev(rkv.key)
currentRev = rev.main
if ok {
if isTombstone(rkv.key) {
ki.tombstone(rev.main, rev.sub)
continue
}
ki.put(rev.main, rev.sub)
} else if !isTombstone(rkv.key) {
ki.restore(revision{rkv.kv.CreateRevision, 0}, rev, rkv.kv.Version)
idx.Insert(ki)
kiCache[rkv.kstr] = ki
}
}
}()
return rkvc, revc
}
func restoreChunk(kvc chan<- revKeyValue, keys, vals [][]byte, keyToLease map[string]lease.LeaseID) {
for i, key := range keys {
rkv := revKeyValue{key: key}
if err := rkv.kv.Unmarshal(vals[i]); err != nil {
plog.Fatalf("cannot unmarshal event: %v", err)
}
rkv.kstr = string(rkv.kv.Key)
if isTombstone(key) {
delete(keyToLease, rkv.kstr)
} else if lid := lease.LeaseID(rkv.kv.Lease); lid != lease.NoLease {
keyToLease[rkv.kstr] = lid
} else {
delete(keyToLease, rkv.kstr)
}
kvc <- rkv
}
}
func (s *store) Close() error {
close(s.stopc)
s.fifoSched.Stop()
return nil
}
func (a *store) Equal(b *store) bool {
if a.currentRev != b.currentRev {
return false
}
if a.compactMainRev != b.compactMainRev {
return false
}
return a.kvindex.Equal(b.kvindex)
}
func (s *store) saveIndex(tx backend.BatchTx) {
if s.ig == nil {
return
}
bs := s.bytesBuf8
binary.BigEndian.PutUint64(bs, s.ig.ConsistentIndex())
// put the index into the underlying backend
// tx has been locked in TxnBegin, so there is no need to lock it again
tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs)
}
func (s *store) ConsistentIndex() uint64 {
// TODO: cache index in a uint64 field?
tx := s.b.BatchTx()
tx.Lock()
defer tx.Unlock()
_, vs := tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0)
if len(vs) == 0 {
return 0
}
return binary.BigEndian.Uint64(vs[0])
}
// appendMarkTombstone appends tombstone mark to normal revision bytes.
func appendMarkTombstone(b []byte) []byte {
if len(b) != revBytesLen {
plog.Panicf("cannot append mark to non normal revision bytes")
}
return append(b, markTombstone)
}
// isTombstone checks whether the revision bytes is a tombstone.
func isTombstone(b []byte) bool {
return len(b) == markedRevBytesLen && b[markBytePosition] == markTombstone
}
// revBytesRange returns the range of revision bytes at
// the given revision.
func revBytesRange(rev revision) (start, end []byte) {
start = newRevBytes()
revToBytes(rev, start)
end = newRevBytes()
endRev := revision{main: rev.main, sub: rev.sub + 1}
revToBytes(endRev, end)
return start, end
}

View file

@ -0,0 +1,66 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"time"
)
func (s *store) scheduleCompaction(compactMainRev int64, keep map[revision]struct{}) bool {
totalStart := time.Now()
defer dbCompactionTotalDurations.Observe(float64(time.Since(totalStart) / time.Millisecond))
end := make([]byte, 8)
binary.BigEndian.PutUint64(end, uint64(compactMainRev+1))
batchsize := int64(10000)
last := make([]byte, 8+1+8)
for {
var rev revision
start := time.Now()
tx := s.b.BatchTx()
tx.Lock()
keys, _ := tx.UnsafeRange(keyBucketName, last, end, batchsize)
for _, key := range keys {
rev = bytesToRev(key)
if _, ok := keep[rev]; !ok {
tx.UnsafeDelete(keyBucketName, key)
}
}
if len(keys) < int(batchsize) {
rbytes := make([]byte, 8+1+8)
revToBytes(revision{main: compactMainRev}, rbytes)
tx.UnsafePut(metaBucketName, finishedCompactKeyName, rbytes)
tx.Unlock()
plog.Printf("finished scheduled compaction at %d (took %v)", compactMainRev, time.Since(totalStart))
return true
}
// update last
revToBytes(revision{main: rev.main, sub: rev.sub + 1}, last)
tx.Unlock()
dbCompactionPauseDurations.Observe(float64(time.Since(start) / time.Millisecond))
select {
case <-time.After(100 * time.Millisecond):
case <-s.stopc:
return false
}
}
}

253
vendor/github.com/coreos/etcd/mvcc/kvstore_txn.go generated vendored Normal file
View file

@ -0,0 +1,253 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type storeTxnRead struct {
s *store
tx backend.ReadTx
firstRev int64
rev int64
}
func (s *store) Read() TxnRead {
s.mu.RLock()
tx := s.b.ReadTx()
s.revMu.RLock()
tx.Lock()
firstRev, rev := s.compactMainRev, s.currentRev
s.revMu.RUnlock()
return newMetricsTxnRead(&storeTxnRead{s, tx, firstRev, rev})
}
func (tr *storeTxnRead) FirstRev() int64 { return tr.firstRev }
func (tr *storeTxnRead) Rev() int64 { return tr.rev }
func (tr *storeTxnRead) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
return tr.rangeKeys(key, end, tr.Rev(), ro)
}
func (tr *storeTxnRead) End() {
tr.tx.Unlock()
tr.s.mu.RUnlock()
}
type storeTxnWrite struct {
*storeTxnRead
tx backend.BatchTx
// beginRev is the revision where the txn begins; it will write to the next revision.
beginRev int64
changes []mvccpb.KeyValue
}
func (s *store) Write() TxnWrite {
s.mu.RLock()
tx := s.b.BatchTx()
tx.Lock()
tw := &storeTxnWrite{
storeTxnRead: &storeTxnRead{s, tx, 0, 0},
tx: tx,
beginRev: s.currentRev,
changes: make([]mvccpb.KeyValue, 0, 4),
}
return newMetricsTxnWrite(tw)
}
func (tw *storeTxnWrite) Rev() int64 { return tw.beginRev }
func (tw *storeTxnWrite) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
rev := tw.beginRev
if len(tw.changes) > 0 {
rev++
}
return tw.rangeKeys(key, end, rev, ro)
}
func (tw *storeTxnWrite) DeleteRange(key, end []byte) (int64, int64) {
if n := tw.deleteRange(key, end); n != 0 || len(tw.changes) > 0 {
return n, int64(tw.beginRev + 1)
}
return 0, int64(tw.beginRev)
}
func (tw *storeTxnWrite) Put(key, value []byte, lease lease.LeaseID) int64 {
tw.put(key, value, lease)
return int64(tw.beginRev + 1)
}
func (tw *storeTxnWrite) End() {
// only update index if the txn modifies the mvcc state.
if len(tw.changes) != 0 {
tw.s.saveIndex(tw.tx)
// hold revMu lock to prevent new read txns from opening until writeback.
tw.s.revMu.Lock()
tw.s.currentRev++
}
tw.tx.Unlock()
if len(tw.changes) != 0 {
tw.s.revMu.Unlock()
}
tw.s.mu.RUnlock()
}
func (tr *storeTxnRead) rangeKeys(key, end []byte, curRev int64, ro RangeOptions) (*RangeResult, error) {
rev := ro.Rev
if rev > curRev {
return &RangeResult{KVs: nil, Count: -1, Rev: curRev}, ErrFutureRev
}
if rev <= 0 {
rev = curRev
}
if rev < tr.s.compactMainRev {
return &RangeResult{KVs: nil, Count: -1, Rev: 0}, ErrCompacted
}
_, revpairs := tr.s.kvindex.Range(key, end, int64(rev))
if len(revpairs) == 0 {
return &RangeResult{KVs: nil, Count: 0, Rev: curRev}, nil
}
if ro.Count {
return &RangeResult{KVs: nil, Count: len(revpairs), Rev: curRev}, nil
}
var kvs []mvccpb.KeyValue
for _, revpair := range revpairs {
start, end := revBytesRange(revpair)
_, vs := tr.tx.UnsafeRange(keyBucketName, start, end, 0)
if len(vs) != 1 {
plog.Fatalf("range cannot find rev (%d,%d)", revpair.main, revpair.sub)
}
var kv mvccpb.KeyValue
if err := kv.Unmarshal(vs[0]); err != nil {
plog.Fatalf("cannot unmarshal event: %v", err)
}
kvs = append(kvs, kv)
if ro.Limit > 0 && len(kvs) >= int(ro.Limit) {
break
}
}
return &RangeResult{KVs: kvs, Count: len(revpairs), Rev: curRev}, nil
}
func (tw *storeTxnWrite) put(key, value []byte, leaseID lease.LeaseID) {
rev := tw.beginRev + 1
c := rev
oldLease := lease.NoLease
// if the key exists before, use its previous created and
// get its previous leaseID
_, created, ver, err := tw.s.kvindex.Get(key, rev)
if err == nil {
c = created.main
oldLease = tw.s.le.GetLease(lease.LeaseItem{Key: string(key)})
}
ibytes := newRevBytes()
idxRev := revision{main: rev, sub: int64(len(tw.changes))}
revToBytes(idxRev, ibytes)
ver = ver + 1
kv := mvccpb.KeyValue{
Key: key,
Value: value,
CreateRevision: c,
ModRevision: rev,
Version: ver,
Lease: int64(leaseID),
}
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
tw.s.kvindex.Put(key, idxRev)
tw.changes = append(tw.changes, kv)
if oldLease != lease.NoLease {
if tw.s.le == nil {
panic("no lessor to detach lease")
}
err = tw.s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
plog.Errorf("unexpected error from lease detach: %v", err)
}
}
if leaseID != lease.NoLease {
if tw.s.le == nil {
panic("no lessor to attach lease")
}
err = tw.s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
panic("unexpected error from lease Attach")
}
}
}
func (tw *storeTxnWrite) deleteRange(key, end []byte) int64 {
rrev := tw.beginRev
if len(tw.changes) > 0 {
rrev += 1
}
keys, revs := tw.s.kvindex.Range(key, end, rrev)
if len(keys) == 0 {
return 0
}
for i, key := range keys {
tw.delete(key, revs[i])
}
return int64(len(keys))
}
func (tw *storeTxnWrite) delete(key []byte, rev revision) {
ibytes := newRevBytes()
idxRev := revision{main: tw.beginRev + 1, sub: int64(len(tw.changes))}
revToBytes(idxRev, ibytes)
ibytes = appendMarkTombstone(ibytes)
kv := mvccpb.KeyValue{Key: key}
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
err = tw.s.kvindex.Tombstone(key, idxRev)
if err != nil {
plog.Fatalf("cannot tombstone an existing key (%s): %v", string(key), err)
}
tw.changes = append(tw.changes, kv)
item := lease.LeaseItem{Key: string(key)}
leaseID := tw.s.le.GetLease(item)
if leaseID != lease.NoLease {
err = tw.s.le.Detach(leaseID, []lease.LeaseItem{item})
if err != nil {
plog.Errorf("cannot detach %v", err)
}
}
}
func (tw *storeTxnWrite) Changes() []mvccpb.KeyValue { return tw.changes }

174
vendor/github.com/coreos/etcd/mvcc/metrics.go generated vendored Normal file
View file

@ -0,0 +1,174 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
var (
rangeCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "range_total",
Help: "Total number of ranges seen by this member.",
})
putCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "put_total",
Help: "Total number of puts seen by this member.",
})
deleteCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "delete_total",
Help: "Total number of deletes seen by this member.",
})
txnCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "txn_total",
Help: "Total number of txns seen by this member.",
})
keysGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "keys_total",
Help: "Total number of keys.",
})
watchStreamGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "watch_stream_total",
Help: "Total number of watch streams.",
})
watcherGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "watcher_total",
Help: "Total number of watchers.",
})
slowWatcherGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "slow_watcher_total",
Help: "Total number of unsynced slow watchers.",
})
totalEventsCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "events_total",
Help: "Total number of events sent by this member.",
})
pendingEventsGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "pending_events_total",
Help: "Total number of pending events to be sent.",
})
indexCompactionPauseDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "index_compaction_pause_duration_milliseconds",
Help: "Bucketed histogram of index compaction pause duration.",
// 0.5ms -> 1second
Buckets: prometheus.ExponentialBuckets(0.5, 2, 12),
})
dbCompactionPauseDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_compaction_pause_duration_milliseconds",
Help: "Bucketed histogram of db compaction pause duration.",
// 1ms -> 4second
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
})
dbCompactionTotalDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_compaction_total_duration_milliseconds",
Help: "Bucketed histogram of db compaction total duration.",
// 100ms -> 800second
Buckets: prometheus.ExponentialBuckets(100, 2, 14),
})
dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_total_size_in_bytes",
Help: "Total size of the underlying database in bytes.",
},
func() float64 {
reportDbTotalSizeInBytesMu.RLock()
defer reportDbTotalSizeInBytesMu.RUnlock()
return reportDbTotalSizeInBytes()
},
)
// overridden by mvcc initialization
reportDbTotalSizeInBytesMu sync.RWMutex
reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 }
)
func init() {
prometheus.MustRegister(rangeCounter)
prometheus.MustRegister(putCounter)
prometheus.MustRegister(deleteCounter)
prometheus.MustRegister(txnCounter)
prometheus.MustRegister(keysGauge)
prometheus.MustRegister(watchStreamGauge)
prometheus.MustRegister(watcherGauge)
prometheus.MustRegister(slowWatcherGauge)
prometheus.MustRegister(totalEventsCounter)
prometheus.MustRegister(pendingEventsGauge)
prometheus.MustRegister(indexCompactionPauseDurations)
prometheus.MustRegister(dbCompactionPauseDurations)
prometheus.MustRegister(dbCompactionTotalDurations)
prometheus.MustRegister(dbTotalSize)
}
// ReportEventReceived reports that an event is received.
// This function should be called when the external systems received an
// event from mvcc.Watcher.
func ReportEventReceived(n int) {
pendingEventsGauge.Sub(float64(n))
totalEventsCounter.Add(float64(n))
}

67
vendor/github.com/coreos/etcd/mvcc/metrics_txn.go generated vendored Normal file
View file

@ -0,0 +1,67 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
)
type metricsTxnWrite struct {
TxnWrite
ranges uint
puts uint
deletes uint
}
func newMetricsTxnRead(tr TxnRead) TxnRead {
return &metricsTxnWrite{&txnReadWrite{tr}, 0, 0, 0}
}
func newMetricsTxnWrite(tw TxnWrite) TxnWrite {
return &metricsTxnWrite{tw, 0, 0, 0}
}
func (tw *metricsTxnWrite) Range(key, end []byte, ro RangeOptions) (*RangeResult, error) {
tw.ranges++
return tw.TxnWrite.Range(key, end, ro)
}
func (tw *metricsTxnWrite) DeleteRange(key, end []byte) (n, rev int64) {
tw.deletes++
return tw.TxnWrite.DeleteRange(key, end)
}
func (tw *metricsTxnWrite) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
tw.puts++
return tw.TxnWrite.Put(key, value, lease)
}
func (tw *metricsTxnWrite) End() {
defer tw.TxnWrite.End()
if sum := tw.ranges + tw.puts + tw.deletes; sum != 1 {
if sum > 1 {
txnCounter.Inc()
}
return
}
switch {
case tw.ranges == 1:
rangeCounter.Inc()
case tw.puts == 1:
putCounter.Inc()
case tw.deletes == 1:
deleteCounter.Inc()
}
}

67
vendor/github.com/coreos/etcd/mvcc/revision.go generated vendored Normal file
View file

@ -0,0 +1,67 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import "encoding/binary"
// revBytesLen is the byte length of a normal revision.
// First 8 bytes is the revision.main in big-endian format. The 9th byte
// is a '_'. The last 8 bytes is the revision.sub in big-endian format.
const revBytesLen = 8 + 1 + 8
// A revision indicates modification of the key-value space.
// The set of changes that share same main revision changes the key-value space atomically.
type revision struct {
// main is the main revision of a set of changes that happen atomically.
main int64
// sub is the the sub revision of a change in a set of changes that happen
// atomically. Each change has different increasing sub revision in that
// set.
sub int64
}
func (a revision) GreaterThan(b revision) bool {
if a.main > b.main {
return true
}
if a.main < b.main {
return false
}
return a.sub > b.sub
}
func newRevBytes() []byte {
return make([]byte, revBytesLen, markedRevBytesLen)
}
func revToBytes(rev revision, bytes []byte) {
binary.BigEndian.PutUint64(bytes, uint64(rev.main))
bytes[8] = '_'
binary.BigEndian.PutUint64(bytes[9:], uint64(rev.sub))
}
func bytesToRev(bytes []byte) revision {
return revision{
main: int64(binary.BigEndian.Uint64(bytes[0:8])),
sub: int64(binary.BigEndian.Uint64(bytes[9:])),
}
}
type revisions []revision
func (a revisions) Len() int { return len(a) }
func (a revisions) Less(i, j int) bool { return a[j].GreaterThan(a[i]) }
func (a revisions) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

56
vendor/github.com/coreos/etcd/mvcc/util.go generated vendored Normal file
View file

@ -0,0 +1,56 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
func UpdateConsistentIndex(be backend.Backend, index uint64) {
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
var oldi uint64
_, vs := tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0)
if len(vs) != 0 {
oldi = binary.BigEndian.Uint64(vs[0])
}
if index <= oldi {
return
}
bs := make([]byte, 8)
binary.BigEndian.PutUint64(bs, index)
tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs)
}
func WriteKV(be backend.Backend, kv mvccpb.KeyValue) {
ibytes := newRevBytes()
revToBytes(revision{main: kv.ModRevision}, ibytes)
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
be.BatchTx().Lock()
be.BatchTx().UnsafePut(keyBucketName, ibytes, d)
be.BatchTx().Unlock()
}

522
vendor/github.com/coreos/etcd/mvcc/watchable_store.go generated vendored Normal file
View file

@ -0,0 +1,522 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sync"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
const (
// chanBufLen is the length of the buffered chan
// for sending out watched events.
// TODO: find a good buf value. 1024 is just a random one that
// seems to be reasonable.
chanBufLen = 1024
// maxWatchersPerSync is the number of watchers to sync in a single batch
maxWatchersPerSync = 512
)
type watchable interface {
watch(key, end []byte, startRev int64, id WatchID, ch chan<- WatchResponse, fcs ...FilterFunc) (*watcher, cancelFunc)
progress(w *watcher)
rev() int64
}
type watchableStore struct {
*store
// mu protects watcher groups and batches. It should never be locked
// before locking store.mu to avoid deadlock.
mu sync.RWMutex
// victims are watcher batches that were blocked on the watch channel
victims []watcherBatch
victimc chan struct{}
// contains all unsynced watchers that needs to sync with events that have happened
unsynced watcherGroup
// contains all synced watchers that are in sync with the progress of the store.
// The key of the map is the key that the watcher watches on.
synced watcherGroup
stopc chan struct{}
wg sync.WaitGroup
}
// cancelFunc updates unsynced and synced maps when running
// cancel operations.
type cancelFunc func()
func New(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) ConsistentWatchableKV {
return newWatchableStore(b, le, ig)
}
func newWatchableStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *watchableStore {
s := &watchableStore{
store: NewStore(b, le, ig),
victimc: make(chan struct{}, 1),
unsynced: newWatcherGroup(),
synced: newWatcherGroup(),
stopc: make(chan struct{}),
}
s.store.ReadView = &readView{s}
s.store.WriteView = &writeView{s}
if s.le != nil {
// use this store as the deleter so revokes trigger watch events
s.le.SetRangeDeleter(func() lease.TxnDelete { return s.Write() })
}
s.wg.Add(2)
go s.syncWatchersLoop()
go s.syncVictimsLoop()
return s
}
func (s *watchableStore) Close() error {
close(s.stopc)
s.wg.Wait()
return s.store.Close()
}
func (s *watchableStore) NewWatchStream() WatchStream {
watchStreamGauge.Inc()
return &watchStream{
watchable: s,
ch: make(chan WatchResponse, chanBufLen),
cancels: make(map[WatchID]cancelFunc),
watchers: make(map[WatchID]*watcher),
}
}
func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch chan<- WatchResponse, fcs ...FilterFunc) (*watcher, cancelFunc) {
wa := &watcher{
key: key,
end: end,
minRev: startRev,
id: id,
ch: ch,
fcs: fcs,
}
s.mu.Lock()
s.revMu.RLock()
synced := startRev > s.store.currentRev || startRev == 0
if synced {
wa.minRev = s.store.currentRev + 1
if startRev > wa.minRev {
wa.minRev = startRev
}
}
if synced {
s.synced.add(wa)
} else {
slowWatcherGauge.Inc()
s.unsynced.add(wa)
}
s.revMu.RUnlock()
s.mu.Unlock()
watcherGauge.Inc()
return wa, func() { s.cancelWatcher(wa) }
}
// cancelWatcher removes references of the watcher from the watchableStore
func (s *watchableStore) cancelWatcher(wa *watcher) {
for {
s.mu.Lock()
if s.unsynced.delete(wa) {
slowWatcherGauge.Dec()
break
} else if s.synced.delete(wa) {
break
} else if wa.compacted {
break
}
if !wa.victim {
panic("watcher not victim but not in watch groups")
}
var victimBatch watcherBatch
for _, wb := range s.victims {
if wb[wa] != nil {
victimBatch = wb
break
}
}
if victimBatch != nil {
slowWatcherGauge.Dec()
delete(victimBatch, wa)
break
}
// victim being processed so not accessible; retry
s.mu.Unlock()
time.Sleep(time.Millisecond)
}
watcherGauge.Dec()
s.mu.Unlock()
}
func (s *watchableStore) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
err := s.store.Restore(b)
if err != nil {
return err
}
for wa := range s.synced.watchers {
s.unsynced.watchers.add(wa)
}
s.synced = newWatcherGroup()
return nil
}
// syncWatchersLoop syncs the watcher in the unsynced map every 100ms.
func (s *watchableStore) syncWatchersLoop() {
defer s.wg.Done()
for {
s.mu.RLock()
st := time.Now()
lastUnsyncedWatchers := s.unsynced.size()
s.mu.RUnlock()
unsyncedWatchers := 0
if lastUnsyncedWatchers > 0 {
unsyncedWatchers = s.syncWatchers()
}
syncDuration := time.Since(st)
waitDuration := 100 * time.Millisecond
// more work pending?
if unsyncedWatchers != 0 && lastUnsyncedWatchers > unsyncedWatchers {
// be fair to other store operations by yielding time taken
waitDuration = syncDuration
}
select {
case <-time.After(waitDuration):
case <-s.stopc:
return
}
}
}
// syncVictimsLoop tries to write precomputed watcher responses to
// watchers that had a blocked watcher channel
func (s *watchableStore) syncVictimsLoop() {
defer s.wg.Done()
for {
for s.moveVictims() != 0 {
// try to update all victim watchers
}
s.mu.RLock()
isEmpty := len(s.victims) == 0
s.mu.RUnlock()
var tickc <-chan time.Time
if !isEmpty {
tickc = time.After(10 * time.Millisecond)
}
select {
case <-tickc:
case <-s.victimc:
case <-s.stopc:
return
}
}
}
// moveVictims tries to update watches with already pending event data
func (s *watchableStore) moveVictims() (moved int) {
s.mu.Lock()
victims := s.victims
s.victims = nil
s.mu.Unlock()
var newVictim watcherBatch
for _, wb := range victims {
// try to send responses again
for w, eb := range wb {
// watcher has observed the store up to, but not including, w.minRev
rev := w.minRev - 1
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
if newVictim == nil {
newVictim = make(watcherBatch)
}
newVictim[w] = eb
continue
}
moved++
}
// assign completed victim watchers to unsync/sync
s.mu.Lock()
s.store.revMu.RLock()
curRev := s.store.currentRev
for w, eb := range wb {
if newVictim != nil && newVictim[w] != nil {
// couldn't send watch response; stays victim
continue
}
w.victim = false
if eb.moreRev != 0 {
w.minRev = eb.moreRev
}
if w.minRev <= curRev {
s.unsynced.add(w)
} else {
slowWatcherGauge.Dec()
s.synced.add(w)
}
}
s.store.revMu.RUnlock()
s.mu.Unlock()
}
if len(newVictim) > 0 {
s.mu.Lock()
s.victims = append(s.victims, newVictim)
s.mu.Unlock()
}
return moved
}
// syncWatchers syncs unsynced watchers by:
// 1. choose a set of watchers from the unsynced watcher group
// 2. iterate over the set to get the minimum revision and remove compacted watchers
// 3. use minimum revision to get all key-value pairs and send those events to watchers
// 4. remove synced watchers in set from unsynced group and move to synced group
func (s *watchableStore) syncWatchers() int {
s.mu.Lock()
defer s.mu.Unlock()
if s.unsynced.size() == 0 {
return 0
}
s.store.revMu.RLock()
defer s.store.revMu.RUnlock()
// in order to find key-value pairs from unsynced watchers, we need to
// find min revision index, and these revisions can be used to
// query the backend store of key-value pairs
curRev := s.store.currentRev
compactionRev := s.store.compactMainRev
wg, minRev := s.unsynced.choose(maxWatchersPerSync, curRev, compactionRev)
minBytes, maxBytes := newRevBytes(), newRevBytes()
revToBytes(revision{main: minRev}, minBytes)
revToBytes(revision{main: curRev + 1}, maxBytes)
// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
// values are actual key-value pairs in backend.
tx := s.store.b.ReadTx()
tx.Lock()
revs, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
evs := kvsToEvents(wg, revs, vs)
tx.Unlock()
var victims watcherBatch
wb := newWatcherBatch(wg, evs)
for w := range wg.watchers {
w.minRev = curRev + 1
eb, ok := wb[w]
if !ok {
// bring un-notified watcher to synced
s.synced.add(w)
s.unsynced.delete(w)
continue
}
if eb.moreRev != 0 {
w.minRev = eb.moreRev
}
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
if victims == nil {
victims = make(watcherBatch)
}
w.victim = true
}
if w.victim {
victims[w] = eb
} else {
if eb.moreRev != 0 {
// stay unsynced; more to read
continue
}
s.synced.add(w)
}
s.unsynced.delete(w)
}
s.addVictim(victims)
vsz := 0
for _, v := range s.victims {
vsz += len(v)
}
slowWatcherGauge.Set(float64(s.unsynced.size() + vsz))
return s.unsynced.size()
}
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(wg *watcherGroup, revs, vals [][]byte) (evs []mvccpb.Event) {
for i, v := range vals {
var kv mvccpb.KeyValue
if err := kv.Unmarshal(v); err != nil {
plog.Panicf("cannot unmarshal event: %v", err)
}
if !wg.contains(string(kv.Key)) {
continue
}
ty := mvccpb.PUT
if isTombstone(revs[i]) {
ty = mvccpb.DELETE
// patch in mod revision so watchers won't skip
kv.ModRevision = bytesToRev(revs[i]).main
}
evs = append(evs, mvccpb.Event{Kv: &kv, Type: ty})
}
return evs
}
// notify notifies the fact that given event at the given rev just happened to
// watchers that watch on the key of the event.
func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) {
var victim watcherBatch
for w, eb := range newWatcherBatch(&s.synced, evs) {
if eb.revs != 1 {
plog.Panicf("unexpected multiple revisions in notification")
}
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
// move slow watcher to victims
w.minRev = rev + 1
if victim == nil {
victim = make(watcherBatch)
}
w.victim = true
victim[w] = eb
s.synced.delete(w)
slowWatcherGauge.Inc()
}
}
s.addVictim(victim)
}
func (s *watchableStore) addVictim(victim watcherBatch) {
if victim == nil {
return
}
s.victims = append(s.victims, victim)
select {
case s.victimc <- struct{}{}:
default:
}
}
func (s *watchableStore) rev() int64 { return s.store.Rev() }
func (s *watchableStore) progress(w *watcher) {
s.mu.RLock()
defer s.mu.RUnlock()
if _, ok := s.synced.watchers[w]; ok {
w.send(WatchResponse{WatchID: w.id, Revision: s.rev()})
// If the ch is full, this watcher is receiving events.
// We do not need to send progress at all.
}
}
type watcher struct {
// the watcher key
key []byte
// end indicates the end of the range to watch.
// If end is set, the watcher is on a range.
end []byte
// victim is set when ch is blocked and undergoing victim processing
victim bool
// compacted is set when the watcher is removed because of compaction
compacted bool
// minRev is the minimum revision update the watcher will accept
minRev int64
id WatchID
fcs []FilterFunc
// a chan to send out the watch response.
// The chan might be shared with other watchers.
ch chan<- WatchResponse
}
func (w *watcher) send(wr WatchResponse) bool {
progressEvent := len(wr.Events) == 0
if len(w.fcs) != 0 {
ne := make([]mvccpb.Event, 0, len(wr.Events))
for i := range wr.Events {
filtered := false
for _, filter := range w.fcs {
if filter(wr.Events[i]) {
filtered = true
break
}
}
if !filtered {
ne = append(ne, wr.Events[i])
}
}
wr.Events = ne
}
// if all events are filtered out, we should send nothing.
if !progressEvent && len(wr.Events) == 0 {
return true
}
select {
case w.ch <- wr:
return true
default:
return false
}
}

View file

@ -0,0 +1,53 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/mvcc/mvccpb"
)
func (tw *watchableStoreTxnWrite) End() {
changes := tw.Changes()
if len(changes) == 0 {
tw.TxnWrite.End()
return
}
rev := tw.Rev() + 1
evs := make([]mvccpb.Event, len(changes))
for i, change := range changes {
evs[i].Kv = &changes[i]
if change.CreateRevision == 0 {
evs[i].Type = mvccpb.DELETE
evs[i].Kv.ModRevision = rev
} else {
evs[i].Type = mvccpb.PUT
}
}
// end write txn under watchable store lock so the updates are visible
// when asynchronous event posting checks the current store revision
tw.s.mu.Lock()
tw.s.notify(rev, evs)
tw.TxnWrite.End()
tw.s.mu.Unlock()
}
type watchableStoreTxnWrite struct {
TxnWrite
s *watchableStore
}
func (s *watchableStore) Write() TxnWrite { return &watchableStoreTxnWrite{s.store.Write(), s} }

171
vendor/github.com/coreos/etcd/mvcc/watcher.go generated vendored Normal file
View file

@ -0,0 +1,171 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"bytes"
"errors"
"sync"
"github.com/coreos/etcd/mvcc/mvccpb"
)
var (
ErrWatcherNotExist = errors.New("mvcc: watcher does not exist")
)
type WatchID int64
// FilterFunc returns true if the given event should be filtered out.
type FilterFunc func(e mvccpb.Event) bool
type WatchStream interface {
// Watch creates a watcher. The watcher watches the events happening or
// happened on the given key or range [key, end) from the given startRev.
//
// The whole event history can be watched unless compacted.
// If `startRev` <=0, watch observes events after currentRev.
//
// The returned `id` is the ID of this watcher. It appears as WatchID
// in events that are sent to the created watcher through stream channel.
//
Watch(key, end []byte, startRev int64, fcs ...FilterFunc) WatchID
// Chan returns a chan. All watch response will be sent to the returned chan.
Chan() <-chan WatchResponse
// RequestProgress requests the progress of the watcher with given ID. The response
// will only be sent if the watcher is currently synced.
// The responses will be sent through the WatchRespone Chan attached
// with this stream to ensure correct ordering.
// The responses contains no events. The revision in the response is the progress
// of the watchers since the watcher is currently synced.
RequestProgress(id WatchID)
// Cancel cancels a watcher by giving its ID. If watcher does not exist, an error will be
// returned.
Cancel(id WatchID) error
// Close closes Chan and release all related resources.
Close()
// Rev returns the current revision of the KV the stream watches on.
Rev() int64
}
type WatchResponse struct {
// WatchID is the WatchID of the watcher this response sent to.
WatchID WatchID
// Events contains all the events that needs to send.
Events []mvccpb.Event
// Revision is the revision of the KV when the watchResponse is created.
// For a normal response, the revision should be the same as the last
// modified revision inside Events. For a delayed response to a unsynced
// watcher, the revision is greater than the last modified revision
// inside Events.
Revision int64
// CompactRevision is set when the watcher is cancelled due to compaction.
CompactRevision int64
}
// watchStream contains a collection of watchers that share
// one streaming chan to send out watched events and other control events.
type watchStream struct {
watchable watchable
ch chan WatchResponse
mu sync.Mutex // guards fields below it
// nextID is the ID pre-allocated for next new watcher in this stream
nextID WatchID
closed bool
cancels map[WatchID]cancelFunc
watchers map[WatchID]*watcher
}
// Watch creates a new watcher in the stream and returns its WatchID.
// TODO: return error if ws is closed?
func (ws *watchStream) Watch(key, end []byte, startRev int64, fcs ...FilterFunc) WatchID {
// prevent wrong range where key >= end lexicographically
// watch request with 'WithFromKey' has empty-byte range end
if len(end) != 0 && bytes.Compare(key, end) != -1 {
return -1
}
ws.mu.Lock()
defer ws.mu.Unlock()
if ws.closed {
return -1
}
id := ws.nextID
ws.nextID++
w, c := ws.watchable.watch(key, end, startRev, id, ws.ch, fcs...)
ws.cancels[id] = c
ws.watchers[id] = w
return id
}
func (ws *watchStream) Chan() <-chan WatchResponse {
return ws.ch
}
func (ws *watchStream) Cancel(id WatchID) error {
ws.mu.Lock()
cancel, ok := ws.cancels[id]
ok = ok && !ws.closed
if ok {
delete(ws.cancels, id)
delete(ws.watchers, id)
}
ws.mu.Unlock()
if !ok {
return ErrWatcherNotExist
}
cancel()
return nil
}
func (ws *watchStream) Close() {
ws.mu.Lock()
defer ws.mu.Unlock()
for _, cancel := range ws.cancels {
cancel()
}
ws.closed = true
close(ws.ch)
watchStreamGauge.Dec()
}
func (ws *watchStream) Rev() int64 {
ws.mu.Lock()
defer ws.mu.Unlock()
return ws.watchable.rev()
}
func (ws *watchStream) RequestProgress(id WatchID) {
ws.mu.Lock()
w, ok := ws.watchers[id]
ws.mu.Unlock()
if !ok {
return
}
ws.watchable.progress(w)
}

283
vendor/github.com/coreos/etcd/mvcc/watcher_group.go generated vendored Normal file
View file

@ -0,0 +1,283 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"math"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/adt"
)
var (
// watchBatchMaxRevs is the maximum distinct revisions that
// may be sent to an unsynced watcher at a time. Declared as
// var instead of const for testing purposes.
watchBatchMaxRevs = 1000
)
type eventBatch struct {
// evs is a batch of revision-ordered events
evs []mvccpb.Event
// revs is the minimum unique revisions observed for this batch
revs int
// moreRev is first revision with more events following this batch
moreRev int64
}
func (eb *eventBatch) add(ev mvccpb.Event) {
if eb.revs > watchBatchMaxRevs {
// maxed out batch size
return
}
if len(eb.evs) == 0 {
// base case
eb.revs = 1
eb.evs = append(eb.evs, ev)
return
}
// revision accounting
ebRev := eb.evs[len(eb.evs)-1].Kv.ModRevision
evRev := ev.Kv.ModRevision
if evRev > ebRev {
eb.revs++
if eb.revs > watchBatchMaxRevs {
eb.moreRev = evRev
return
}
}
eb.evs = append(eb.evs, ev)
}
type watcherBatch map[*watcher]*eventBatch
func (wb watcherBatch) add(w *watcher, ev mvccpb.Event) {
eb := wb[w]
if eb == nil {
eb = &eventBatch{}
wb[w] = eb
}
eb.add(ev)
}
// newWatcherBatch maps watchers to their matched events. It enables quick
// events look up by watcher.
func newWatcherBatch(wg *watcherGroup, evs []mvccpb.Event) watcherBatch {
if len(wg.watchers) == 0 {
return nil
}
wb := make(watcherBatch)
for _, ev := range evs {
for w := range wg.watcherSetByKey(string(ev.Kv.Key)) {
if ev.Kv.ModRevision >= w.minRev {
// don't double notify
wb.add(w, ev)
}
}
}
return wb
}
type watcherSet map[*watcher]struct{}
func (w watcherSet) add(wa *watcher) {
if _, ok := w[wa]; ok {
panic("add watcher twice!")
}
w[wa] = struct{}{}
}
func (w watcherSet) union(ws watcherSet) {
for wa := range ws {
w.add(wa)
}
}
func (w watcherSet) delete(wa *watcher) {
if _, ok := w[wa]; !ok {
panic("removing missing watcher!")
}
delete(w, wa)
}
type watcherSetByKey map[string]watcherSet
func (w watcherSetByKey) add(wa *watcher) {
set := w[string(wa.key)]
if set == nil {
set = make(watcherSet)
w[string(wa.key)] = set
}
set.add(wa)
}
func (w watcherSetByKey) delete(wa *watcher) bool {
k := string(wa.key)
if v, ok := w[k]; ok {
if _, ok := v[wa]; ok {
delete(v, wa)
if len(v) == 0 {
// remove the set; nothing left
delete(w, k)
}
return true
}
}
return false
}
// watcherGroup is a collection of watchers organized by their ranges
type watcherGroup struct {
// keyWatchers has the watchers that watch on a single key
keyWatchers watcherSetByKey
// ranges has the watchers that watch a range; it is sorted by interval
ranges adt.IntervalTree
// watchers is the set of all watchers
watchers watcherSet
}
func newWatcherGroup() watcherGroup {
return watcherGroup{
keyWatchers: make(watcherSetByKey),
watchers: make(watcherSet),
}
}
// add puts a watcher in the group.
func (wg *watcherGroup) add(wa *watcher) {
wg.watchers.add(wa)
if wa.end == nil {
wg.keyWatchers.add(wa)
return
}
// interval already registered?
ivl := adt.NewStringAffineInterval(string(wa.key), string(wa.end))
if iv := wg.ranges.Find(ivl); iv != nil {
iv.Val.(watcherSet).add(wa)
return
}
// not registered, put in interval tree
ws := make(watcherSet)
ws.add(wa)
wg.ranges.Insert(ivl, ws)
}
// contains is whether the given key has a watcher in the group.
func (wg *watcherGroup) contains(key string) bool {
_, ok := wg.keyWatchers[key]
return ok || wg.ranges.Intersects(adt.NewStringAffinePoint(key))
}
// size gives the number of unique watchers in the group.
func (wg *watcherGroup) size() int { return len(wg.watchers) }
// delete removes a watcher from the group.
func (wg *watcherGroup) delete(wa *watcher) bool {
if _, ok := wg.watchers[wa]; !ok {
return false
}
wg.watchers.delete(wa)
if wa.end == nil {
wg.keyWatchers.delete(wa)
return true
}
ivl := adt.NewStringAffineInterval(string(wa.key), string(wa.end))
iv := wg.ranges.Find(ivl)
if iv == nil {
return false
}
ws := iv.Val.(watcherSet)
delete(ws, wa)
if len(ws) == 0 {
// remove interval missing watchers
if ok := wg.ranges.Delete(ivl); !ok {
panic("could not remove watcher from interval tree")
}
}
return true
}
// choose selects watchers from the watcher group to update
func (wg *watcherGroup) choose(maxWatchers int, curRev, compactRev int64) (*watcherGroup, int64) {
if len(wg.watchers) < maxWatchers {
return wg, wg.chooseAll(curRev, compactRev)
}
ret := newWatcherGroup()
for w := range wg.watchers {
if maxWatchers <= 0 {
break
}
maxWatchers--
ret.add(w)
}
return &ret, ret.chooseAll(curRev, compactRev)
}
func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 {
minRev := int64(math.MaxInt64)
for w := range wg.watchers {
if w.minRev > curRev {
panic("watcher current revision should not exceed current revision")
}
if w.minRev < compactRev {
select {
case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactRev}:
w.compacted = true
wg.delete(w)
default:
// retry next time
}
continue
}
if minRev > w.minRev {
minRev = w.minRev
}
}
return minRev
}
// watcherSetByKey gets the set of watchers that receive events on the given key.
func (wg *watcherGroup) watcherSetByKey(key string) watcherSet {
wkeys := wg.keyWatchers[key]
wranges := wg.ranges.Stab(adt.NewStringAffinePoint(key))
// zero-copy cases
switch {
case len(wranges) == 0:
// no need to merge ranges or copy; reuse single-key set
return wkeys
case len(wranges) == 0 && len(wkeys) == 0:
return nil
case len(wranges) == 1 && len(wkeys) == 0:
return wranges[0].Val.(watcherSet)
}
// copy case
ret := make(watcherSet)
ret.union(wg.keyWatchers[key])
for _, item := range wranges {
ret.union(item.Val.(watcherSet))
}
return ret
}

150
vendor/github.com/docker/cli/cli/cobra.go generated vendored Normal file
View file

@ -0,0 +1,150 @@
package cli
import (
"fmt"
"strings"
"github.com/docker/docker/pkg/term"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)
// SetupRootCommand sets default usage, help, and error handling for the
// root command.
func SetupRootCommand(rootCmd *cobra.Command) {
cobra.AddTemplateFunc("hasSubCommands", hasSubCommands)
cobra.AddTemplateFunc("hasManagementSubCommands", hasManagementSubCommands)
cobra.AddTemplateFunc("operationSubCommands", operationSubCommands)
cobra.AddTemplateFunc("managementSubCommands", managementSubCommands)
cobra.AddTemplateFunc("wrappedFlagUsages", wrappedFlagUsages)
rootCmd.SetUsageTemplate(usageTemplate)
rootCmd.SetHelpTemplate(helpTemplate)
rootCmd.SetFlagErrorFunc(FlagErrorFunc)
rootCmd.SetHelpCommand(helpCommand)
rootCmd.PersistentFlags().BoolP("help", "h", false, "Print usage")
rootCmd.PersistentFlags().MarkShorthandDeprecated("help", "please use --help")
}
// FlagErrorFunc prints an error message which matches the format of the
// docker/cli/cli error messages
func FlagErrorFunc(cmd *cobra.Command, err error) error {
if err == nil {
return nil
}
usage := ""
if cmd.HasSubCommands() {
usage = "\n\n" + cmd.UsageString()
}
return StatusError{
Status: fmt.Sprintf("%s\nSee '%s --help'.%s", err, cmd.CommandPath(), usage),
StatusCode: 125,
}
}
var helpCommand = &cobra.Command{
Use: "help [command]",
Short: "Help about the command",
PersistentPreRun: func(cmd *cobra.Command, args []string) {},
PersistentPostRun: func(cmd *cobra.Command, args []string) {},
RunE: func(c *cobra.Command, args []string) error {
cmd, args, e := c.Root().Find(args)
if cmd == nil || e != nil || len(args) > 0 {
return errors.Errorf("unknown help topic: %v", strings.Join(args, " "))
}
helpFunc := cmd.HelpFunc()
helpFunc(cmd, args)
return nil
},
}
func hasSubCommands(cmd *cobra.Command) bool {
return len(operationSubCommands(cmd)) > 0
}
func hasManagementSubCommands(cmd *cobra.Command) bool {
return len(managementSubCommands(cmd)) > 0
}
func operationSubCommands(cmd *cobra.Command) []*cobra.Command {
cmds := []*cobra.Command{}
for _, sub := range cmd.Commands() {
if sub.IsAvailableCommand() && !sub.HasSubCommands() {
cmds = append(cmds, sub)
}
}
return cmds
}
func wrappedFlagUsages(cmd *cobra.Command) string {
width := 80
if ws, err := term.GetWinsize(0); err == nil {
width = int(ws.Width)
}
return cmd.Flags().FlagUsagesWrapped(width - 1)
}
func managementSubCommands(cmd *cobra.Command) []*cobra.Command {
cmds := []*cobra.Command{}
for _, sub := range cmd.Commands() {
if sub.IsAvailableCommand() && sub.HasSubCommands() {
cmds = append(cmds, sub)
}
}
return cmds
}
var usageTemplate = `Usage:
{{- if not .HasSubCommands}} {{.UseLine}}{{end}}
{{- if .HasSubCommands}} {{ .CommandPath}} COMMAND{{end}}
{{ .Short | trim }}
{{- if gt .Aliases 0}}
Aliases:
{{.NameAndAliases}}
{{- end}}
{{- if .HasExample}}
Examples:
{{ .Example }}
{{- end}}
{{- if .HasFlags}}
Options:
{{ wrappedFlagUsages . | trimRightSpace}}
{{- end}}
{{- if hasManagementSubCommands . }}
Management Commands:
{{- range managementSubCommands . }}
{{rpad .Name .NamePadding }} {{.Short}}
{{- end}}
{{- end}}
{{- if hasSubCommands .}}
Commands:
{{- range operationSubCommands . }}
{{rpad .Name .NamePadding }} {{.Short}}
{{- end}}
{{- end}}
{{- if .HasSubCommands }}
Run '{{.CommandPath}} COMMAND --help' for more information on a command.
{{- end}}
`
var helpTemplate = `
{{if or .Runnable .HasSubCommands}}{{.UsageString}}{{end}}`

305
vendor/github.com/docker/cli/cli/command/cli.go generated vendored Normal file
View file

@ -0,0 +1,305 @@
package command
import (
"fmt"
"io"
"net/http"
"os"
"runtime"
"github.com/docker/cli/cli"
cliconfig "github.com/docker/cli/cli/config"
"github.com/docker/cli/cli/config/configfile"
"github.com/docker/cli/cli/config/credentials"
cliflags "github.com/docker/cli/cli/flags"
dopts "github.com/docker/cli/opts"
"github.com/docker/docker/api"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/versions"
"github.com/docker/docker/client"
"github.com/docker/go-connections/sockets"
"github.com/docker/go-connections/tlsconfig"
"github.com/docker/notary/passphrase"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"golang.org/x/net/context"
)
// Streams is an interface which exposes the standard input and output streams
type Streams interface {
In() *InStream
Out() *OutStream
Err() io.Writer
}
// Cli represents the docker command line client.
type Cli interface {
Client() client.APIClient
Out() *OutStream
Err() io.Writer
In() *InStream
SetIn(in *InStream)
ConfigFile() *configfile.ConfigFile
CredentialsStore(serverAddress string) credentials.Store
}
// DockerCli is an instance the docker command line client.
// Instances of the client can be returned from NewDockerCli.
type DockerCli struct {
configFile *configfile.ConfigFile
in *InStream
out *OutStream
err io.Writer
client client.APIClient
defaultVersion string
server ServerInfo
}
// DefaultVersion returns api.defaultVersion or DOCKER_API_VERSION if specified.
func (cli *DockerCli) DefaultVersion() string {
return cli.defaultVersion
}
// Client returns the APIClient
func (cli *DockerCli) Client() client.APIClient {
return cli.client
}
// Out returns the writer used for stdout
func (cli *DockerCli) Out() *OutStream {
return cli.out
}
// Err returns the writer used for stderr
func (cli *DockerCli) Err() io.Writer {
return cli.err
}
// SetIn sets the reader used for stdin
func (cli *DockerCli) SetIn(in *InStream) {
cli.in = in
}
// In returns the reader used for stdin
func (cli *DockerCli) In() *InStream {
return cli.in
}
// ShowHelp shows the command help.
func ShowHelp(err io.Writer) func(*cobra.Command, []string) error {
return func(cmd *cobra.Command, args []string) error {
cmd.SetOutput(err)
cmd.HelpFunc()(cmd, args)
return nil
}
}
// ConfigFile returns the ConfigFile
func (cli *DockerCli) ConfigFile() *configfile.ConfigFile {
return cli.configFile
}
// ServerInfo returns the server version details for the host this client is
// connected to
func (cli *DockerCli) ServerInfo() ServerInfo {
return cli.server
}
// GetAllCredentials returns all of the credentials stored in all of the
// configured credential stores.
func (cli *DockerCli) GetAllCredentials() (map[string]types.AuthConfig, error) {
auths := make(map[string]types.AuthConfig)
for registry := range cli.configFile.CredentialHelpers {
helper := cli.CredentialsStore(registry)
newAuths, err := helper.GetAll()
if err != nil {
return nil, err
}
addAll(auths, newAuths)
}
defaultStore := cli.CredentialsStore("")
newAuths, err := defaultStore.GetAll()
if err != nil {
return nil, err
}
addAll(auths, newAuths)
return auths, nil
}
func addAll(to, from map[string]types.AuthConfig) {
for reg, ac := range from {
to[reg] = ac
}
}
// CredentialsStore returns a new credentials store based
// on the settings provided in the configuration file. Empty string returns
// the default credential store.
func (cli *DockerCli) CredentialsStore(serverAddress string) credentials.Store {
if helper := getConfiguredCredentialStore(cli.configFile, serverAddress); helper != "" {
return credentials.NewNativeStore(cli.configFile, helper)
}
return credentials.NewFileStore(cli.configFile)
}
// getConfiguredCredentialStore returns the credential helper configured for the
// given registry, the default credsStore, or the empty string if neither are
// configured.
func getConfiguredCredentialStore(c *configfile.ConfigFile, serverAddress string) string {
if c.CredentialHelpers != nil && serverAddress != "" {
if helper, exists := c.CredentialHelpers[serverAddress]; exists {
return helper
}
}
return c.CredentialsStore
}
// Initialize the dockerCli runs initialization that must happen after command
// line flags are parsed.
func (cli *DockerCli) Initialize(opts *cliflags.ClientOptions) error {
cli.configFile = LoadDefaultConfigFile(cli.err)
var err error
cli.client, err = NewAPIClientFromFlags(opts.Common, cli.configFile)
if tlsconfig.IsErrEncryptedKey(err) {
var (
passwd string
giveup bool
)
passRetriever := passphrase.PromptRetrieverWithInOut(cli.In(), cli.Out(), nil)
for attempts := 0; tlsconfig.IsErrEncryptedKey(err); attempts++ {
// some code and comments borrowed from notary/trustmanager/keystore.go
passwd, giveup, err = passRetriever("private", "encrypted TLS private", false, attempts)
// Check if the passphrase retriever got an error or if it is telling us to give up
if giveup || err != nil {
return errors.Wrap(err, "private key is encrypted, but could not get passphrase")
}
opts.Common.TLSOptions.Passphrase = passwd
cli.client, err = NewAPIClientFromFlags(opts.Common, cli.configFile)
}
}
if err != nil {
return err
}
cli.defaultVersion = cli.client.ClientVersion()
if ping, err := cli.client.Ping(context.Background()); err == nil {
cli.server = ServerInfo{
HasExperimental: ping.Experimental,
OSType: ping.OSType,
}
// since the new header was added in 1.25, assume server is 1.24 if header is not present.
if ping.APIVersion == "" {
ping.APIVersion = "1.24"
}
// if server version is lower than the current cli, downgrade
if versions.LessThan(ping.APIVersion, cli.client.ClientVersion()) {
cli.client.UpdateClientVersion(ping.APIVersion)
}
}
return nil
}
// ServerInfo stores details about the supported features and platform of the
// server
type ServerInfo struct {
HasExperimental bool
OSType string
}
// NewDockerCli returns a DockerCli instance with IO output and error streams set by in, out and err.
func NewDockerCli(in io.ReadCloser, out, err io.Writer) *DockerCli {
return &DockerCli{in: NewInStream(in), out: NewOutStream(out), err: err}
}
// LoadDefaultConfigFile attempts to load the default config file and returns
// an initialized ConfigFile struct if none is found.
func LoadDefaultConfigFile(err io.Writer) *configfile.ConfigFile {
configFile, e := cliconfig.Load(cliconfig.Dir())
if e != nil {
fmt.Fprintf(err, "WARNING: Error loading config file:%v\n", e)
}
if !configFile.ContainsAuth() {
credentials.DetectDefaultStore(configFile)
}
return configFile
}
// NewAPIClientFromFlags creates a new APIClient from command line flags
func NewAPIClientFromFlags(opts *cliflags.CommonOptions, configFile *configfile.ConfigFile) (client.APIClient, error) {
host, err := getServerHost(opts.Hosts, opts.TLSOptions)
if err != nil {
return &client.Client{}, err
}
customHeaders := configFile.HTTPHeaders
if customHeaders == nil {
customHeaders = map[string]string{}
}
customHeaders["User-Agent"] = UserAgent()
verStr := api.DefaultVersion
if tmpStr := os.Getenv("DOCKER_API_VERSION"); tmpStr != "" {
verStr = tmpStr
}
httpClient, err := newHTTPClient(host, opts.TLSOptions)
if err != nil {
return &client.Client{}, err
}
return client.NewClient(host, verStr, httpClient, customHeaders)
}
func getServerHost(hosts []string, tlsOptions *tlsconfig.Options) (host string, err error) {
switch len(hosts) {
case 0:
host = os.Getenv("DOCKER_HOST")
case 1:
host = hosts[0]
default:
return "", errors.New("Please specify only one -H")
}
host, err = dopts.ParseHost(tlsOptions != nil, host)
return
}
func newHTTPClient(host string, tlsOptions *tlsconfig.Options) (*http.Client, error) {
if tlsOptions == nil {
// let the api client configure the default transport.
return nil, nil
}
opts := *tlsOptions
opts.ExclusiveRootPools = true
config, err := tlsconfig.Client(opts)
if err != nil {
return nil, err
}
tr := &http.Transport{
TLSClientConfig: config,
}
proto, addr, _, err := client.ParseHost(host)
if err != nil {
return nil, err
}
sockets.ConfigureTransport(tr, proto, addr)
return &http.Client{
Transport: tr,
CheckRedirect: client.CheckRedirect,
}, nil
}
// UserAgent returns the user agent string used for making API requests
func UserAgent() string {
return "Docker-Client/" + cli.Version + " (" + runtime.GOOS + ")"
}

View file

@ -0,0 +1,47 @@
package command
import (
"sync"
"github.com/Sirupsen/logrus"
eventtypes "github.com/docker/docker/api/types/events"
)
// EventHandler is abstract interface for user to customize
// own handle functions of each type of events
type EventHandler interface {
Handle(action string, h func(eventtypes.Message))
Watch(c <-chan eventtypes.Message)
}
// InitEventHandler initializes and returns an EventHandler
func InitEventHandler() EventHandler {
return &eventHandler{handlers: make(map[string]func(eventtypes.Message))}
}
type eventHandler struct {
handlers map[string]func(eventtypes.Message)
mu sync.Mutex
}
func (w *eventHandler) Handle(action string, h func(eventtypes.Message)) {
w.mu.Lock()
w.handlers[action] = h
w.mu.Unlock()
}
// Watch ranges over the passed in event chan and processes the events based on the
// handlers created for a given action.
// To stop watching, close the event chan.
func (w *eventHandler) Watch(c <-chan eventtypes.Message) {
for e := range c {
w.mu.Lock()
h, exists := w.handlers[e.Action]
w.mu.Unlock()
if !exists {
continue
}
logrus.Debugf("event handler: received event: %v", e)
go h(e)
}
}

500
vendor/github.com/docker/cli/cli/command/image/build.go generated vendored Normal file
View file

@ -0,0 +1,500 @@
package image
import (
"archive/tar"
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"regexp"
"runtime"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/command/image/build"
"github.com/docker/cli/opts"
"github.com/docker/distribution/reference"
"github.com/docker/docker/api"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/jsonmessage"
"github.com/docker/docker/pkg/progress"
"github.com/docker/docker/pkg/streamformatter"
"github.com/docker/docker/pkg/urlutil"
runconfigopts "github.com/docker/docker/runconfig/opts"
units "github.com/docker/go-units"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"golang.org/x/net/context"
)
type buildOptions struct {
context string
dockerfileName string
tags opts.ListOpts
labels opts.ListOpts
buildArgs opts.ListOpts
extraHosts opts.ListOpts
ulimits *opts.UlimitOpt
memory opts.MemBytes
memorySwap opts.MemSwapBytes
shmSize opts.MemBytes
cpuShares int64
cpuPeriod int64
cpuQuota int64
cpuSetCpus string
cpuSetMems string
cgroupParent string
isolation string
quiet bool
noCache bool
rm bool
forceRm bool
pull bool
cacheFrom []string
compress bool
securityOpt []string
networkMode string
squash bool
target string
imageIDFile string
}
// dockerfileFromStdin returns true when the user specified that the Dockerfile
// should be read from stdin instead of a file
func (o buildOptions) dockerfileFromStdin() bool {
return o.dockerfileName == "-"
}
// contextFromStdin returns true when the user specified that the build context
// should be read from stdin
func (o buildOptions) contextFromStdin() bool {
return o.context == "-"
}
// NewBuildCommand creates a new `docker build` command
func NewBuildCommand(dockerCli *command.DockerCli) *cobra.Command {
ulimits := make(map[string]*units.Ulimit)
options := buildOptions{
tags: opts.NewListOpts(validateTag),
buildArgs: opts.NewListOpts(opts.ValidateEnv),
ulimits: opts.NewUlimitOpt(&ulimits),
labels: opts.NewListOpts(opts.ValidateEnv),
extraHosts: opts.NewListOpts(opts.ValidateExtraHost),
}
cmd := &cobra.Command{
Use: "build [OPTIONS] PATH | URL | -",
Short: "Build an image from a Dockerfile",
Args: cli.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
options.context = args[0]
return runBuild(dockerCli, options)
},
}
flags := cmd.Flags()
flags.VarP(&options.tags, "tag", "t", "Name and optionally a tag in the 'name:tag' format")
flags.Var(&options.buildArgs, "build-arg", "Set build-time variables")
flags.Var(options.ulimits, "ulimit", "Ulimit options")
flags.StringVarP(&options.dockerfileName, "file", "f", "", "Name of the Dockerfile (Default is 'PATH/Dockerfile')")
flags.VarP(&options.memory, "memory", "m", "Memory limit")
flags.Var(&options.memorySwap, "memory-swap", "Swap limit equal to memory plus swap: '-1' to enable unlimited swap")
flags.Var(&options.shmSize, "shm-size", "Size of /dev/shm")
flags.Int64VarP(&options.cpuShares, "cpu-shares", "c", 0, "CPU shares (relative weight)")
flags.Int64Var(&options.cpuPeriod, "cpu-period", 0, "Limit the CPU CFS (Completely Fair Scheduler) period")
flags.Int64Var(&options.cpuQuota, "cpu-quota", 0, "Limit the CPU CFS (Completely Fair Scheduler) quota")
flags.StringVar(&options.cpuSetCpus, "cpuset-cpus", "", "CPUs in which to allow execution (0-3, 0,1)")
flags.StringVar(&options.cpuSetMems, "cpuset-mems", "", "MEMs in which to allow execution (0-3, 0,1)")
flags.StringVar(&options.cgroupParent, "cgroup-parent", "", "Optional parent cgroup for the container")
flags.StringVar(&options.isolation, "isolation", "", "Container isolation technology")
flags.Var(&options.labels, "label", "Set metadata for an image")
flags.BoolVar(&options.noCache, "no-cache", false, "Do not use cache when building the image")
flags.BoolVar(&options.rm, "rm", true, "Remove intermediate containers after a successful build")
flags.BoolVar(&options.forceRm, "force-rm", false, "Always remove intermediate containers")
flags.BoolVarP(&options.quiet, "quiet", "q", false, "Suppress the build output and print image ID on success")
flags.BoolVar(&options.pull, "pull", false, "Always attempt to pull a newer version of the image")
flags.StringSliceVar(&options.cacheFrom, "cache-from", []string{}, "Images to consider as cache sources")
flags.BoolVar(&options.compress, "compress", false, "Compress the build context using gzip")
flags.StringSliceVar(&options.securityOpt, "security-opt", []string{}, "Security options")
flags.StringVar(&options.networkMode, "network", "default", "Set the networking mode for the RUN instructions during build")
flags.SetAnnotation("network", "version", []string{"1.25"})
flags.Var(&options.extraHosts, "add-host", "Add a custom host-to-IP mapping (host:ip)")
flags.StringVar(&options.target, "target", "", "Set the target build stage to build.")
flags.StringVar(&options.imageIDFile, "iidfile", "", "Write the image ID to the file")
command.AddTrustVerificationFlags(flags)
flags.BoolVar(&options.squash, "squash", false, "Squash newly built layers into a single new layer")
flags.SetAnnotation("squash", "experimental", nil)
flags.SetAnnotation("squash", "version", []string{"1.25"})
return cmd
}
// lastProgressOutput is the same as progress.Output except
// that it only output with the last update. It is used in
// non terminal scenarios to suppress verbose messages
type lastProgressOutput struct {
output progress.Output
}
// WriteProgress formats progress information from a ProgressReader.
func (out *lastProgressOutput) WriteProgress(prog progress.Progress) error {
if !prog.LastUpdate {
return nil
}
return out.output.WriteProgress(prog)
}
// nolint: gocyclo
func runBuild(dockerCli *command.DockerCli, options buildOptions) error {
var (
buildCtx io.ReadCloser
dockerfileCtx io.ReadCloser
err error
contextDir string
tempDir string
relDockerfile string
progBuff io.Writer
buildBuff io.Writer
)
if options.dockerfileFromStdin() {
if options.contextFromStdin() {
return errors.New("invalid argument: can't use stdin for both build context and dockerfile")
}
dockerfileCtx = dockerCli.In()
}
specifiedContext := options.context
progBuff = dockerCli.Out()
buildBuff = dockerCli.Out()
if options.quiet {
progBuff = bytes.NewBuffer(nil)
buildBuff = bytes.NewBuffer(nil)
}
if options.imageIDFile != "" {
// Avoid leaving a stale file if we eventually fail
if err := os.Remove(options.imageIDFile); err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "Removing image ID file")
}
}
switch {
case options.contextFromStdin():
buildCtx, relDockerfile, err = build.GetContextFromReader(dockerCli.In(), options.dockerfileName)
case isLocalDir(specifiedContext):
contextDir, relDockerfile, err = build.GetContextFromLocalDir(specifiedContext, options.dockerfileName)
case urlutil.IsGitURL(specifiedContext):
tempDir, relDockerfile, err = build.GetContextFromGitURL(specifiedContext, options.dockerfileName)
case urlutil.IsURL(specifiedContext):
buildCtx, relDockerfile, err = build.GetContextFromURL(progBuff, specifiedContext, options.dockerfileName)
default:
return errors.Errorf("unable to prepare context: path %q not found", specifiedContext)
}
if err != nil {
if options.quiet && urlutil.IsURL(specifiedContext) {
fmt.Fprintln(dockerCli.Err(), progBuff)
}
return errors.Errorf("unable to prepare context: %s", err)
}
if tempDir != "" {
defer os.RemoveAll(tempDir)
contextDir = tempDir
}
if buildCtx == nil {
excludes, err := build.ReadDockerignore(contextDir)
if err != nil {
return err
}
if err := build.ValidateContextDirectory(contextDir, excludes); err != nil {
return errors.Errorf("error checking context: '%s'.", err)
}
// And canonicalize dockerfile name to a platform-independent one
relDockerfile, err = archive.CanonicalTarNameForPath(relDockerfile)
if err != nil {
return errors.Errorf("cannot canonicalize dockerfile path %s: %v", relDockerfile, err)
}
excludes = build.TrimBuildFilesFromExcludes(excludes, relDockerfile, options.dockerfileFromStdin())
compression := archive.Uncompressed
if options.compress {
compression = archive.Gzip
}
buildCtx, err = archive.TarWithOptions(contextDir, &archive.TarOptions{
Compression: compression,
ExcludePatterns: excludes,
})
if err != nil {
return err
}
}
// replace Dockerfile if added dynamically
if dockerfileCtx != nil {
buildCtx, relDockerfile, err = build.AddDockerfileToBuildContext(dockerfileCtx, buildCtx)
if err != nil {
return err
}
}
ctx := context.Background()
var resolvedTags []*resolvedTag
if command.IsTrusted() {
translator := func(ctx context.Context, ref reference.NamedTagged) (reference.Canonical, error) {
return TrustedReference(ctx, dockerCli, ref, nil)
}
// Wrap the tar archive to replace the Dockerfile entry with the rewritten
// Dockerfile which uses trusted pulls.
buildCtx = replaceDockerfileTarWrapper(ctx, buildCtx, relDockerfile, translator, &resolvedTags)
}
// Setup an upload progress bar
progressOutput := streamformatter.NewProgressOutput(progBuff)
if !dockerCli.Out().IsTerminal() {
progressOutput = &lastProgressOutput{output: progressOutput}
}
var body io.Reader = progress.NewProgressReader(buildCtx, progressOutput, 0, "", "Sending build context to Docker daemon")
authConfigs, _ := dockerCli.GetAllCredentials()
buildOptions := types.ImageBuildOptions{
Memory: options.memory.Value(),
MemorySwap: options.memorySwap.Value(),
Tags: options.tags.GetAll(),
SuppressOutput: options.quiet,
NoCache: options.noCache,
Remove: options.rm,
ForceRemove: options.forceRm,
PullParent: options.pull,
Isolation: container.Isolation(options.isolation),
CPUSetCPUs: options.cpuSetCpus,
CPUSetMems: options.cpuSetMems,
CPUShares: options.cpuShares,
CPUQuota: options.cpuQuota,
CPUPeriod: options.cpuPeriod,
CgroupParent: options.cgroupParent,
Dockerfile: relDockerfile,
ShmSize: options.shmSize.Value(),
Ulimits: options.ulimits.GetList(),
BuildArgs: runconfigopts.ConvertKVStringsToMapWithNil(options.buildArgs.GetAll()),
AuthConfigs: authConfigs,
Labels: runconfigopts.ConvertKVStringsToMap(options.labels.GetAll()),
CacheFrom: options.cacheFrom,
SecurityOpt: options.securityOpt,
NetworkMode: options.networkMode,
Squash: options.squash,
ExtraHosts: options.extraHosts.GetAll(),
Target: options.target,
}
response, err := dockerCli.Client().ImageBuild(ctx, body, buildOptions)
if err != nil {
if options.quiet {
fmt.Fprintf(dockerCli.Err(), "%s", progBuff)
}
return err
}
defer response.Body.Close()
imageID := ""
aux := func(auxJSON *json.RawMessage) {
var result types.BuildResult
if err := json.Unmarshal(*auxJSON, &result); err != nil {
fmt.Fprintf(dockerCli.Err(), "Failed to parse aux message: %s", err)
} else {
imageID = result.ID
}
}
err = jsonmessage.DisplayJSONMessagesStream(response.Body, buildBuff, dockerCli.Out().FD(), dockerCli.Out().IsTerminal(), aux)
if err != nil {
if jerr, ok := err.(*jsonmessage.JSONError); ok {
// If no error code is set, default to 1
if jerr.Code == 0 {
jerr.Code = 1
}
if options.quiet {
fmt.Fprintf(dockerCli.Err(), "%s%s", progBuff, buildBuff)
}
return cli.StatusError{Status: jerr.Message, StatusCode: jerr.Code}
}
return err
}
// Windows: show error message about modified file permissions if the
// daemon isn't running Windows.
if response.OSType != "windows" && runtime.GOOS == "windows" && !options.quiet {
fmt.Fprintln(dockerCli.Out(), "SECURITY WARNING: You are building a Docker "+
"image from Windows against a non-Windows Docker host. All files and "+
"directories added to build context will have '-rwxr-xr-x' permissions. "+
"It is recommended to double check and reset permissions for sensitive "+
"files and directories.")
}
// Everything worked so if -q was provided the output from the daemon
// should be just the image ID and we'll print that to stdout.
if options.quiet {
imageID = fmt.Sprintf("%s", buildBuff)
fmt.Fprintf(dockerCli.Out(), imageID)
}
if options.imageIDFile != "" {
if imageID == "" {
return errors.Errorf("Server did not provide an image ID. Cannot write %s", options.imageIDFile)
}
if err := ioutil.WriteFile(options.imageIDFile, []byte(imageID), 0666); err != nil {
return err
}
}
if command.IsTrusted() {
// Since the build was successful, now we must tag any of the resolved
// images from the above Dockerfile rewrite.
for _, resolved := range resolvedTags {
if err := TagTrusted(ctx, dockerCli, resolved.digestRef, resolved.tagRef); err != nil {
return err
}
}
}
return nil
}
func isLocalDir(c string) bool {
_, err := os.Stat(c)
return err == nil
}
type translatorFunc func(context.Context, reference.NamedTagged) (reference.Canonical, error)
// validateTag checks if the given image name can be resolved.
func validateTag(rawRepo string) (string, error) {
_, err := reference.ParseNormalizedNamed(rawRepo)
if err != nil {
return "", err
}
return rawRepo, nil
}
var dockerfileFromLinePattern = regexp.MustCompile(`(?i)^[\s]*FROM[ \f\r\t\v]+(?P<image>[^ \f\r\t\v\n#]+)`)
// resolvedTag records the repository, tag, and resolved digest reference
// from a Dockerfile rewrite.
type resolvedTag struct {
digestRef reference.Canonical
tagRef reference.NamedTagged
}
// rewriteDockerfileFrom rewrites the given Dockerfile by resolving images in
// "FROM <image>" instructions to a digest reference. `translator` is a
// function that takes a repository name and tag reference and returns a
// trusted digest reference.
func rewriteDockerfileFrom(ctx context.Context, dockerfile io.Reader, translator translatorFunc) (newDockerfile []byte, resolvedTags []*resolvedTag, err error) {
scanner := bufio.NewScanner(dockerfile)
buf := bytes.NewBuffer(nil)
// Scan the lines of the Dockerfile, looking for a "FROM" line.
for scanner.Scan() {
line := scanner.Text()
matches := dockerfileFromLinePattern.FindStringSubmatch(line)
if matches != nil && matches[1] != api.NoBaseImageSpecifier {
// Replace the line with a resolved "FROM repo@digest"
var ref reference.Named
ref, err = reference.ParseNormalizedNamed(matches[1])
if err != nil {
return nil, nil, err
}
ref = reference.TagNameOnly(ref)
if ref, ok := ref.(reference.NamedTagged); ok && command.IsTrusted() {
trustedRef, err := translator(ctx, ref)
if err != nil {
return nil, nil, err
}
line = dockerfileFromLinePattern.ReplaceAllLiteralString(line, fmt.Sprintf("FROM %s", reference.FamiliarString(trustedRef)))
resolvedTags = append(resolvedTags, &resolvedTag{
digestRef: trustedRef,
tagRef: ref,
})
}
}
_, err := fmt.Fprintln(buf, line)
if err != nil {
return nil, nil, err
}
}
return buf.Bytes(), resolvedTags, scanner.Err()
}
// replaceDockerfileTarWrapper wraps the given input tar archive stream and
// replaces the entry with the given Dockerfile name with the contents of the
// new Dockerfile. Returns a new tar archive stream with the replaced
// Dockerfile.
func replaceDockerfileTarWrapper(ctx context.Context, inputTarStream io.ReadCloser, dockerfileName string, translator translatorFunc, resolvedTags *[]*resolvedTag) io.ReadCloser {
pipeReader, pipeWriter := io.Pipe()
go func() {
tarReader := tar.NewReader(inputTarStream)
tarWriter := tar.NewWriter(pipeWriter)
defer inputTarStream.Close()
for {
hdr, err := tarReader.Next()
if err == io.EOF {
// Signals end of archive.
tarWriter.Close()
pipeWriter.Close()
return
}
if err != nil {
pipeWriter.CloseWithError(err)
return
}
content := io.Reader(tarReader)
if hdr.Name == dockerfileName {
// This entry is the Dockerfile. Since the tar archive was
// generated from a directory on the local filesystem, the
// Dockerfile will only appear once in the archive.
var newDockerfile []byte
newDockerfile, *resolvedTags, err = rewriteDockerfileFrom(ctx, content, translator)
if err != nil {
pipeWriter.CloseWithError(err)
return
}
hdr.Size = int64(len(newDockerfile))
content = bytes.NewBuffer(newDockerfile)
}
if err := tarWriter.WriteHeader(hdr); err != nil {
pipeWriter.CloseWithError(err)
return
}
if _, err := io.Copy(tarWriter, content); err != nil {
pipeWriter.CloseWithError(err)
return
}
}
}()
return pipeReader
}

34
vendor/github.com/docker/cli/cli/command/image/cmd.go generated vendored Normal file
View file

@ -0,0 +1,34 @@
package image
import (
"github.com/spf13/cobra"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
)
// NewImageCommand returns a cobra command for `image` subcommands
// nolint: interfacer
func NewImageCommand(dockerCli *command.DockerCli) *cobra.Command {
cmd := &cobra.Command{
Use: "image",
Short: "Manage images",
Args: cli.NoArgs,
RunE: command.ShowHelp(dockerCli.Err()),
}
cmd.AddCommand(
NewBuildCommand(dockerCli),
NewHistoryCommand(dockerCli),
NewImportCommand(dockerCli),
NewLoadCommand(dockerCli),
NewPullCommand(dockerCli),
NewPushCommand(dockerCli),
NewSaveCommand(dockerCli),
NewTagCommand(dockerCli),
newListCommand(dockerCli),
newRemoveCommand(dockerCli),
newInspectCommand(dockerCli),
NewPruneCommand(dockerCli),
)
return cmd
}

View file

@ -0,0 +1,64 @@
package image
import (
"golang.org/x/net/context"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/command/formatter"
"github.com/spf13/cobra"
)
type historyOptions struct {
image string
human bool
quiet bool
noTrunc bool
format string
}
// NewHistoryCommand creates a new `docker history` command
func NewHistoryCommand(dockerCli command.Cli) *cobra.Command {
var opts historyOptions
cmd := &cobra.Command{
Use: "history [OPTIONS] IMAGE",
Short: "Show the history of an image",
Args: cli.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
opts.image = args[0]
return runHistory(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.BoolVarP(&opts.human, "human", "H", true, "Print sizes and dates in human readable format")
flags.BoolVarP(&opts.quiet, "quiet", "q", false, "Only show numeric IDs")
flags.BoolVar(&opts.noTrunc, "no-trunc", false, "Don't truncate output")
flags.StringVar(&opts.format, "format", "", "Pretty-print images using a Go template")
return cmd
}
func runHistory(dockerCli command.Cli, opts historyOptions) error {
ctx := context.Background()
history, err := dockerCli.Client().ImageHistory(ctx, opts.image)
if err != nil {
return err
}
format := opts.format
if len(format) == 0 {
format = formatter.TableFormatKey
}
historyCtx := formatter.Context{
Output: dockerCli.Out(),
Format: formatter.NewHistoryFormat(format, opts.quiet, opts.human),
Trunc: !opts.noTrunc,
}
return formatter.HistoryWrite(historyCtx, opts.human, history)
}

View file

@ -0,0 +1,87 @@
package image
import (
"io"
"os"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
dockeropts "github.com/docker/cli/opts"
"github.com/docker/docker/api/types"
"github.com/docker/docker/pkg/jsonmessage"
"github.com/docker/docker/pkg/urlutil"
"github.com/spf13/cobra"
"golang.org/x/net/context"
)
type importOptions struct {
source string
reference string
changes dockeropts.ListOpts
message string
}
// NewImportCommand creates a new `docker import` command
func NewImportCommand(dockerCli command.Cli) *cobra.Command {
var options importOptions
cmd := &cobra.Command{
Use: "import [OPTIONS] file|URL|- [REPOSITORY[:TAG]]",
Short: "Import the contents from a tarball to create a filesystem image",
Args: cli.RequiresMinArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
options.source = args[0]
if len(args) > 1 {
options.reference = args[1]
}
return runImport(dockerCli, options)
},
}
flags := cmd.Flags()
options.changes = dockeropts.NewListOpts(nil)
flags.VarP(&options.changes, "change", "c", "Apply Dockerfile instruction to the created image")
flags.StringVarP(&options.message, "message", "m", "", "Set commit message for imported image")
return cmd
}
func runImport(dockerCli command.Cli, options importOptions) error {
var (
in io.Reader
srcName = options.source
)
if options.source == "-" {
in = dockerCli.In()
} else if !urlutil.IsURL(options.source) {
srcName = "-"
file, err := os.Open(options.source)
if err != nil {
return err
}
defer file.Close()
in = file
}
source := types.ImageImportSource{
Source: in,
SourceName: srcName,
}
importOptions := types.ImageImportOptions{
Message: options.message,
Changes: options.changes.GetAll(),
}
clnt := dockerCli.Client()
responseBody, err := clnt.ImageImport(context.Background(), source, options.reference, importOptions)
if err != nil {
return err
}
defer responseBody.Close()
return jsonmessage.DisplayJSONMessagesToStream(responseBody, dockerCli.Out(), nil)
}

View file

@ -0,0 +1,44 @@
package image
import (
"golang.org/x/net/context"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/command/inspect"
"github.com/spf13/cobra"
)
type inspectOptions struct {
format string
refs []string
}
// newInspectCommand creates a new cobra.Command for `docker image inspect`
func newInspectCommand(dockerCli command.Cli) *cobra.Command {
var opts inspectOptions
cmd := &cobra.Command{
Use: "inspect [OPTIONS] IMAGE [IMAGE...]",
Short: "Display detailed information on one or more images",
Args: cli.RequiresMinArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
opts.refs = args
return runInspect(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.StringVarP(&opts.format, "format", "f", "", "Format the output using the given Go template")
return cmd
}
func runInspect(dockerCli command.Cli, opts inspectOptions) error {
client := dockerCli.Client()
ctx := context.Background()
getRefFunc := func(ref string) (interface{}, []byte, error) {
return client.ImageInspectWithRaw(ctx, ref)
}
return inspect.Inspect(dockerCli.Out(), opts.refs, opts.format, getRefFunc)
}

95
vendor/github.com/docker/cli/cli/command/image/list.go generated vendored Normal file
View file

@ -0,0 +1,95 @@
package image
import (
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/command/formatter"
"github.com/docker/cli/opts"
"github.com/docker/docker/api/types"
"github.com/spf13/cobra"
"golang.org/x/net/context"
)
type imagesOptions struct {
matchName string
quiet bool
all bool
noTrunc bool
showDigests bool
format string
filter opts.FilterOpt
}
// NewImagesCommand creates a new `docker images` command
func NewImagesCommand(dockerCli command.Cli) *cobra.Command {
options := imagesOptions{filter: opts.NewFilterOpt()}
cmd := &cobra.Command{
Use: "images [OPTIONS] [REPOSITORY[:TAG]]",
Short: "List images",
Args: cli.RequiresMaxArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) > 0 {
options.matchName = args[0]
}
return runImages(dockerCli, options)
},
}
flags := cmd.Flags()
flags.BoolVarP(&options.quiet, "quiet", "q", false, "Only show numeric IDs")
flags.BoolVarP(&options.all, "all", "a", false, "Show all images (default hides intermediate images)")
flags.BoolVar(&options.noTrunc, "no-trunc", false, "Don't truncate output")
flags.BoolVar(&options.showDigests, "digests", false, "Show digests")
flags.StringVar(&options.format, "format", "", "Pretty-print images using a Go template")
flags.VarP(&options.filter, "filter", "f", "Filter output based on conditions provided")
return cmd
}
func newListCommand(dockerCli command.Cli) *cobra.Command {
cmd := *NewImagesCommand(dockerCli)
cmd.Aliases = []string{"images", "list"}
cmd.Use = "ls [OPTIONS] [REPOSITORY[:TAG]]"
return &cmd
}
func runImages(dockerCli command.Cli, options imagesOptions) error {
ctx := context.Background()
filters := options.filter.Value()
if options.matchName != "" {
filters.Add("reference", options.matchName)
}
listOptions := types.ImageListOptions{
All: options.all,
Filters: filters,
}
images, err := dockerCli.Client().ImageList(ctx, listOptions)
if err != nil {
return err
}
format := options.format
if len(format) == 0 {
if len(dockerCli.ConfigFile().ImagesFormat) > 0 && !options.quiet {
format = dockerCli.ConfigFile().ImagesFormat
} else {
format = formatter.TableFormatKey
}
}
imageCtx := formatter.ImageContext{
Context: formatter.Context{
Output: dockerCli.Out(),
Format: formatter.NewImageFormat(format, options.quiet, options.showDigests),
Trunc: !options.noTrunc,
},
Digest: options.showDigests,
}
return formatter.ImageWrite(imageCtx, images)
}

77
vendor/github.com/docker/cli/cli/command/image/load.go generated vendored Normal file
View file

@ -0,0 +1,77 @@
package image
import (
"io"
"golang.org/x/net/context"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/docker/pkg/jsonmessage"
"github.com/docker/docker/pkg/system"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)
type loadOptions struct {
input string
quiet bool
}
// NewLoadCommand creates a new `docker load` command
func NewLoadCommand(dockerCli command.Cli) *cobra.Command {
var opts loadOptions
cmd := &cobra.Command{
Use: "load [OPTIONS]",
Short: "Load an image from a tar archive or STDIN",
Args: cli.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
return runLoad(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.StringVarP(&opts.input, "input", "i", "", "Read from tar archive file, instead of STDIN")
flags.BoolVarP(&opts.quiet, "quiet", "q", false, "Suppress the load output")
return cmd
}
func runLoad(dockerCli command.Cli, opts loadOptions) error {
var input io.Reader = dockerCli.In()
if opts.input != "" {
// We use system.OpenSequential to use sequential file access on Windows, avoiding
// depleting the standby list un-necessarily. On Linux, this equates to a regular os.Open.
file, err := system.OpenSequential(opts.input)
if err != nil {
return err
}
defer file.Close()
input = file
}
// To avoid getting stuck, verify that a tar file is given either in
// the input flag or through stdin and if not display an error message and exit.
if opts.input == "" && dockerCli.In().IsTerminal() {
return errors.Errorf("requested load from stdin, but stdin is empty")
}
if !dockerCli.Out().IsTerminal() {
opts.quiet = true
}
response, err := dockerCli.Client().ImageLoad(context.Background(), input, opts.quiet)
if err != nil {
return err
}
defer response.Body.Close()
if response.Body != nil && response.JSON {
return jsonmessage.DisplayJSONMessagesToStream(response.Body, dockerCli.Out(), nil)
}
_, err = io.Copy(dockerCli.Out(), response.Body)
return err
}

View file

@ -0,0 +1,95 @@
package image
import (
"fmt"
"golang.org/x/net/context"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/opts"
units "github.com/docker/go-units"
"github.com/spf13/cobra"
)
type pruneOptions struct {
force bool
all bool
filter opts.FilterOpt
}
// NewPruneCommand returns a new cobra prune command for images
func NewPruneCommand(dockerCli command.Cli) *cobra.Command {
options := pruneOptions{filter: opts.NewFilterOpt()}
cmd := &cobra.Command{
Use: "prune [OPTIONS]",
Short: "Remove unused images",
Args: cli.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
spaceReclaimed, output, err := runPrune(dockerCli, options)
if err != nil {
return err
}
if output != "" {
fmt.Fprintln(dockerCli.Out(), output)
}
fmt.Fprintln(dockerCli.Out(), "Total reclaimed space:", units.HumanSize(float64(spaceReclaimed)))
return nil
},
Tags: map[string]string{"version": "1.25"},
}
flags := cmd.Flags()
flags.BoolVarP(&options.force, "force", "f", false, "Do not prompt for confirmation")
flags.BoolVarP(&options.all, "all", "a", false, "Remove all unused images, not just dangling ones")
flags.Var(&options.filter, "filter", "Provide filter values (e.g. 'until=<timestamp>')")
return cmd
}
const (
allImageWarning = `WARNING! This will remove all images without at least one container associated to them.
Are you sure you want to continue?`
danglingWarning = `WARNING! This will remove all dangling images.
Are you sure you want to continue?`
)
func runPrune(dockerCli command.Cli, options pruneOptions) (spaceReclaimed uint64, output string, err error) {
pruneFilters := options.filter.Value()
pruneFilters.Add("dangling", fmt.Sprintf("%v", !options.all))
pruneFilters = command.PruneFilters(dockerCli, pruneFilters)
warning := danglingWarning
if options.all {
warning = allImageWarning
}
if !options.force && !command.PromptForConfirmation(dockerCli.In(), dockerCli.Out(), warning) {
return
}
report, err := dockerCli.Client().ImagesPrune(context.Background(), pruneFilters)
if err != nil {
return
}
if len(report.ImagesDeleted) > 0 {
output = "Deleted Images:\n"
for _, st := range report.ImagesDeleted {
if st.Untagged != "" {
output += fmt.Sprintln("untagged:", st.Untagged)
} else {
output += fmt.Sprintln("deleted:", st.Deleted)
}
}
spaceReclaimed = report.SpaceReclaimed
}
return
}
// RunPrune calls the Image Prune API
// This returns the amount of space reclaimed and a detailed output string
func RunPrune(dockerCli command.Cli, all bool, filter opts.FilterOpt) (uint64, string, error) {
return runPrune(dockerCli, pruneOptions{force: true, all: all, filter: filter})
}

85
vendor/github.com/docker/cli/cli/command/image/pull.go generated vendored Normal file
View file

@ -0,0 +1,85 @@
package image
import (
"fmt"
"strings"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/distribution/reference"
"github.com/docker/docker/registry"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"golang.org/x/net/context"
)
type pullOptions struct {
remote string
all bool
}
// NewPullCommand creates a new `docker pull` command
func NewPullCommand(dockerCli command.Cli) *cobra.Command {
var opts pullOptions
cmd := &cobra.Command{
Use: "pull [OPTIONS] NAME[:TAG|@DIGEST]",
Short: "Pull an image or a repository from a registry",
Args: cli.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
opts.remote = args[0]
return runPull(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.BoolVarP(&opts.all, "all-tags", "a", false, "Download all tagged images in the repository")
command.AddTrustVerificationFlags(flags)
return cmd
}
func runPull(dockerCli command.Cli, opts pullOptions) error {
distributionRef, err := reference.ParseNormalizedNamed(opts.remote)
if err != nil {
return err
}
if opts.all && !reference.IsNameOnly(distributionRef) {
return errors.New("tag can't be used with --all-tags/-a")
}
if !opts.all && reference.IsNameOnly(distributionRef) {
distributionRef = reference.TagNameOnly(distributionRef)
if tagged, ok := distributionRef.(reference.Tagged); ok {
fmt.Fprintf(dockerCli.Out(), "Using default tag: %s\n", tagged.Tag())
}
}
// Resolve the Repository name from fqn to RepositoryInfo
repoInfo, err := registry.ParseRepositoryInfo(distributionRef)
if err != nil {
return err
}
ctx := context.Background()
authConfig := command.ResolveAuthConfig(ctx, dockerCli, repoInfo.Index)
requestPrivilege := command.RegistryAuthenticationPrivilegedFunc(dockerCli, repoInfo.Index, "pull")
// Check if reference has a digest
_, isCanonical := distributionRef.(reference.Canonical)
if command.IsTrusted() && !isCanonical {
err = trustedPull(ctx, dockerCli, repoInfo, distributionRef, authConfig, requestPrivilege)
} else {
err = imagePullPrivileged(ctx, dockerCli, authConfig, reference.FamiliarString(distributionRef), requestPrivilege, opts.all)
}
if err != nil {
if strings.Contains(err.Error(), "when fetching 'plugin'") {
return errors.New(err.Error() + " - Use `docker plugin install`")
}
return err
}
return nil
}

Some files were not shown because too many files have changed in this diff Show more