Vendor integration dependencies.

2017-02-07 22:33:23 +01:00 · 2017-02-07 22:33:23 +01:00 · 55b57c736b
commit 55b57c736b
parent dd5e3fba01
2451 changed files with 731611 additions and 0 deletions
--- a/integration/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go
@ -0,0 +1,130 @@
+package asm
+
+import (
+	"bytes"
+	"fmt"
+	"hash"
+	"hash/crc64"
+	"io"
+	"sync"
+
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
+// stream.
+//
+// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
+// and a storage.Unpacker, which has access to the rawbytes and file order
+// metadata. With the combination of these two items, a precise assembled Tar
+// archive is possible.
+func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser {
+	// ... Since these are interfaces, this is possible, so let's not have a nil pointer
+	if fg == nil || up == nil {
+		return nil
+	}
+	pr, pw := io.Pipe()
+	go func() {
+		err := WriteOutputTarStream(fg, up, pw)
+		if err != nil {
+			pw.CloseWithError(err)
+		} else {
+			pw.Close()
+		}
+	}()
+	return pr
+}
+
+// WriteOutputTarStream writes assembled tar archive to a writer.
+func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
+	// ... Since these are interfaces, this is possible, so let's not have a nil pointer
+	if fg == nil || up == nil {
+		return nil
+	}
+	var copyBuffer []byte
+	var crcHash hash.Hash
+	var crcSum []byte
+	var multiWriter io.Writer
+	for {
+		entry, err := up.Next()
+		if err != nil {
+			if err == io.EOF {
+				return nil
+			}
+			return err
+		}
+		switch entry.Type {
+		case storage.SegmentType:
+			if _, err := w.Write(entry.Payload); err != nil {
+				return err
+			}
+		case storage.FileType:
+			if entry.Size == 0 {
+				continue
+			}
+			fh, err := fg.Get(entry.GetName())
+			if err != nil {
+				return err
+			}
+			if crcHash == nil {
+				crcHash = crc64.New(storage.CRCTable)
+				crcSum = make([]byte, 8)
+				multiWriter = io.MultiWriter(w, crcHash)
+				copyBuffer = byteBufferPool.Get().([]byte)
+				defer byteBufferPool.Put(copyBuffer)
+			} else {
+				crcHash.Reset()
+			}
+
+			if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
+				fh.Close()
+				return err
+			}
+
+			if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
+				// I would rather this be a comparable ErrInvalidChecksum or such,
+				// but since it's coming through the PipeReader, the context of
+				// _which_ file would be lost...
+				fh.Close()
+				return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
+			}
+			fh.Close()
+		}
+	}
+}
+
+var byteBufferPool = &sync.Pool{
+	New: func() interface{} {
+		return make([]byte, 32*1024)
+	},
+}
+
+// copyWithBuffer is taken from stdlib io.Copy implementation
+// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
+func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
+	for {
+		nr, er := src.Read(buf)
+		if nr > 0 {
+			nw, ew := dst.Write(buf[0:nr])
+			if nw > 0 {
+				written += int64(nw)
+			}
+			if ew != nil {
+				err = ew
+				break
+			}
+			if nr != nw {
+				err = io.ErrShortWrite
+				break
+			}
+		}
+		if er == io.EOF {
+			break
+		}
+		if er != nil {
+			err = er
+			break
+		}
+	}
+	return written, err
+}
--- a/integration/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go
@ -0,0 +1,141 @@
+package asm
+
+import (
+	"io"
+	"io/ioutil"
+
+	"github.com/vbatts/tar-split/archive/tar"
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+// NewInputTarStream wraps the Reader stream of a tar archive and provides a
+// Reader stream of the same.
+//
+// In the middle it will pack the segments and file metadata to storage.Packer
+// `p`.
+//
+// The the storage.FilePutter is where payload of files in the stream are
+// stashed. If this stashing is not needed, you can provide a nil
+// storage.FilePutter. Since the checksumming is still needed, then a default
+// of NewDiscardFilePutter will be used internally
+func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
+	// What to do here... folks will want their own access to the Reader that is
+	// their tar archive stream, but we'll need that same stream to use our
+	// forked 'archive/tar'.
+	// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
+	// from, and we'll MITM the stream to store metadata.
+	// We'll need a storage.FilePutter too ...
+
+	// Another concern, whether to do any storage.FilePutter operations, such that we
+	// don't extract any amount of the archive. But then again, we're not making
+	// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
+	// Perhaps we have a DiscardFilePutter that is a bit bucket.
+
+	// we'll return the pipe reader, since TeeReader does not buffer and will
+	// only read what the outputRdr Read's. Since Tar archives have padding on
+	// the end, we want to be the one reading the padding, even if the user's
+	// `archive/tar` doesn't care.
+	pR, pW := io.Pipe()
+	outputRdr := io.TeeReader(r, pW)
+
+	// we need a putter that will generate the crc64 sums of file payloads
+	if fp == nil {
+		fp = storage.NewDiscardFilePutter()
+	}
+
+	go func() {
+		tr := tar.NewReader(outputRdr)
+		tr.RawAccounting = true
+		for {
+			hdr, err := tr.Next()
+			if err != nil {
+				if err != io.EOF {
+					pW.CloseWithError(err)
+					return
+				}
+				// even when an EOF is reached, there is often 1024 null bytes on
+				// the end of an archive. Collect them too.
+				if b := tr.RawBytes(); len(b) > 0 {
+					_, err := p.AddEntry(storage.Entry{
+						Type:    storage.SegmentType,
+						Payload: b,
+					})
+					if err != nil {
+						pW.CloseWithError(err)
+						return
+					}
+				}
+				break // not return. We need the end of the reader.
+			}
+			if hdr == nil {
+				break // not return. We need the end of the reader.
+			}
+
+			if b := tr.RawBytes(); len(b) > 0 {
+				_, err := p.AddEntry(storage.Entry{
+					Type:    storage.SegmentType,
+					Payload: b,
+				})
+				if err != nil {
+					pW.CloseWithError(err)
+					return
+				}
+			}
+
+			var csum []byte
+			if hdr.Size > 0 {
+				var err error
+				_, csum, err = fp.Put(hdr.Name, tr)
+				if err != nil {
+					pW.CloseWithError(err)
+					return
+				}
+			}
+
+			entry := storage.Entry{
+				Type:    storage.FileType,
+				Size:    hdr.Size,
+				Payload: csum,
+			}
+			// For proper marshalling of non-utf8 characters
+			entry.SetName(hdr.Name)
+
+			// File entries added, regardless of size
+			_, err = p.AddEntry(entry)
+			if err != nil {
+				pW.CloseWithError(err)
+				return
+			}
+
+			if b := tr.RawBytes(); len(b) > 0 {
+				_, err = p.AddEntry(storage.Entry{
+					Type:    storage.SegmentType,
+					Payload: b,
+				})
+				if err != nil {
+					pW.CloseWithError(err)
+					return
+				}
+			}
+		}
+
+		// it is allowable, and not uncommon that there is further padding on the
+		// end of an archive, apart from the expected 1024 null bytes.
+		remainder, err := ioutil.ReadAll(outputRdr)
+		if err != nil && err != io.EOF {
+			pW.CloseWithError(err)
+			return
+		}
+		_, err = p.AddEntry(storage.Entry{
+			Type:    storage.SegmentType,
+			Payload: remainder,
+		})
+		if err != nil {
+			pW.CloseWithError(err)
+			return
+		}
+		pW.Close()
+	}()
+
+	return pR, nil
+}
--- a/integration/vendor/github.com/vbatts/tar-split/tar/asm/doc.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/asm/doc.go
@ -0,0 +1,9 @@
+/*
+Package asm provides the API for streaming assembly and disassembly of tar
+archives.
+
+Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the
+metadata for a stream, as well as an implementation of Getting/Putting the file
+entries' payload.
+*/
+package asm
--- a/integration/vendor/github.com/vbatts/tar-split/tar/storage/doc.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/storage/doc.go
@ -0,0 +1,12 @@
+/*
+Package storage is for metadata of a tar archive.
+
+Packing and unpacking the Entries of the stream. The types of streams are
+either segments of raw bytes (for the raw headers and various padding) and for
+an entry marking a file payload.
+
+The raw bytes are stored precisely in the packed (marshalled) Entry, whereas
+the file payload marker include the name of the file, size, and crc64 checksum
+(for basic file integrity).
+*/
+package storage
--- a/integration/vendor/github.com/vbatts/tar-split/tar/storage/entry.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/storage/entry.go
@ -0,0 +1,78 @@
+package storage
+
+import "unicode/utf8"
+
+// Entries is for sorting by Position
+type Entries []Entry
+
+func (e Entries) Len() int           { return len(e) }
+func (e Entries) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
+func (e Entries) Less(i, j int) bool { return e[i].Position < e[j].Position }
+
+// Type of Entry
+type Type int
+
+const (
+	// FileType represents a file payload from the tar stream.
+	//
+	// This will be used to map to relative paths on disk. Only Size > 0 will get
+	// read into a resulting output stream (due to hardlinks).
+	FileType Type = 1 + iota
+	// SegmentType represents a raw bytes segment from the archive stream. These raw
+	// byte segments consist of the raw headers and various padding.
+	//
+	// Its payload is to be marshalled base64 encoded.
+	SegmentType
+)
+
+// Entry is the structure for packing and unpacking the information read from
+// the Tar archive.
+//
+// FileType Payload checksum is using `hash/crc64` for basic file integrity,
+// _not_ for cryptography.
+// From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000
+// collisions in a sample of 18.2 million, CRC64 had none.
+type Entry struct {
+	Type     Type   `json:"type"`
+	Name     string `json:"name,omitempty"`
+	NameRaw  []byte `json:"name_raw,omitempty"`
+	Size     int64  `json:"size,omitempty"`
+	Payload  []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
+	Position int    `json:"position"`
+}
+
+// SetName will check name for valid UTF-8 string, and set the appropriate
+// field. See https://github.com/vbatts/tar-split/issues/17
+func (e *Entry) SetName(name string) {
+	if utf8.ValidString(name) {
+		e.Name = name
+	} else {
+		e.NameRaw = []byte(name)
+	}
+}
+
+// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
+// field
+func (e *Entry) SetNameBytes(name []byte) {
+	if utf8.Valid(name) {
+		e.Name = string(name)
+	} else {
+		e.NameRaw = name
+	}
+}
+
+// GetName returns the string for the entry's name, regardless of the field stored in
+func (e *Entry) GetName() string {
+	if len(e.NameRaw) > 0 {
+		return string(e.NameRaw)
+	}
+	return e.Name
+}
+
+// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
+func (e *Entry) GetNameBytes() []byte {
+	if len(e.NameRaw) > 0 {
+		return e.NameRaw
+	}
+	return []byte(e.Name)
+}
--- a/integration/vendor/github.com/vbatts/tar-split/tar/storage/getter.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/storage/getter.go
@ -0,0 +1,104 @@
+package storage
+
+import (
+	"bytes"
+	"errors"
+	"hash/crc64"
+	"io"
+	"os"
+	"path/filepath"
+)
+
+// FileGetter is the interface for getting a stream of a file payload,
+// addressed by name/filename. Presumably, the names will be scoped to relative
+// file paths.
+type FileGetter interface {
+	// Get returns a stream for the provided file path
+	Get(filename string) (output io.ReadCloser, err error)
+}
+
+// FilePutter is the interface for storing a stream of a file payload,
+// addressed by name/filename.
+type FilePutter interface {
+	// Put returns the size of the stream received, and the crc64 checksum for
+	// the provided stream
+	Put(filename string, input io.Reader) (size int64, checksum []byte, err error)
+}
+
+// FileGetPutter is the interface that groups both Getting and Putting file
+// payloads.
+type FileGetPutter interface {
+	FileGetter
+	FilePutter
+}
+
+// NewPathFileGetter returns a FileGetter that is for files relative to path
+// relpath.
+func NewPathFileGetter(relpath string) FileGetter {
+	return &pathFileGetter{root: relpath}
+}
+
+type pathFileGetter struct {
+	root string
+}
+
+func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) {
+	return os.Open(filepath.Join(pfg.root, filename))
+}
+
+type bufferFileGetPutter struct {
+	files map[string][]byte
+}
+
+func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) {
+	if _, ok := bfgp.files[name]; !ok {
+		return nil, errors.New("no such file")
+	}
+	b := bytes.NewBuffer(bfgp.files[name])
+	return &readCloserWrapper{b}, nil
+}
+
+func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) {
+	crc := crc64.New(CRCTable)
+	buf := bytes.NewBuffer(nil)
+	cw := io.MultiWriter(crc, buf)
+	i, err := io.Copy(cw, r)
+	if err != nil {
+		return 0, nil, err
+	}
+	bfgp.files[name] = buf.Bytes()
+	return i, crc.Sum(nil), nil
+}
+
+type readCloserWrapper struct {
+	io.Reader
+}
+
+func (w *readCloserWrapper) Close() error { return nil }
+
+// NewBufferFileGetPutter is a simple in-memory FileGetPutter
+//
+// Implication is this is memory intensive...
+// Probably best for testing or light weight cases.
+func NewBufferFileGetPutter() FileGetPutter {
+	return &bufferFileGetPutter{
+		files: map[string][]byte{},
+	}
+}
+
+// NewDiscardFilePutter is a bit bucket FilePutter
+func NewDiscardFilePutter() FilePutter {
+	return &bitBucketFilePutter{}
+}
+
+type bitBucketFilePutter struct {
+}
+
+func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) {
+	c := crc64.New(CRCTable)
+	i, err := io.Copy(c, r)
+	return i, c.Sum(nil), err
+}
+
+// CRCTable is the default table used for crc64 sum calculations
+var CRCTable = crc64.MakeTable(crc64.ISO)
--- a/integration/vendor/github.com/vbatts/tar-split/tar/storage/packer.go
+++ b/integration/vendor/github.com/vbatts/tar-split/tar/storage/packer.go
@ -0,0 +1,127 @@
+package storage
+
+import (
+	"encoding/json"
+	"errors"
+	"io"
+	"path/filepath"
+	"unicode/utf8"
+)
+
+// ErrDuplicatePath occurs when a tar archive has more than one entry for the
+// same file path
+var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
+
+// Packer describes the methods to pack Entries to a storage destination
+type Packer interface {
+	// AddEntry packs the Entry and returns its position
+	AddEntry(e Entry) (int, error)
+}
+
+// Unpacker describes the methods to read Entries from a source
+type Unpacker interface {
+	// Next returns the next Entry being unpacked, or error, until io.EOF
+	Next() (*Entry, error)
+}
+
+/* TODO(vbatts) figure out a good model for this
+type PackUnpacker interface {
+	Packer
+	Unpacker
+}
+*/
+
+type jsonUnpacker struct {
+	seen seenNames
+	dec  *json.Decoder
+}
+
+func (jup *jsonUnpacker) Next() (*Entry, error) {
+	var e Entry
+	err := jup.dec.Decode(&e)
+	if err != nil {
+		return nil, err
+	}
+
+	// check for dup name
+	if e.Type == FileType {
+		cName := filepath.Clean(e.GetName())
+		if _, ok := jup.seen[cName]; ok {
+			return nil, ErrDuplicatePath
+		}
+		jup.seen[cName] = struct{}{}
+	}
+
+	return &e, err
+}
+
+// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and
+// FileType) as a json document.
+//
+// Each Entry read are expected to be delimited by new line.
+func NewJSONUnpacker(r io.Reader) Unpacker {
+	return &jsonUnpacker{
+		dec:  json.NewDecoder(r),
+		seen: seenNames{},
+	}
+}
+
+type jsonPacker struct {
+	w    io.Writer
+	e    *json.Encoder
+	pos  int
+	seen seenNames
+}
+
+type seenNames map[string]struct{}
+
+func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
+	// if Name is not valid utf8, switch it to raw first.
+	if e.Name != "" {
+		if !utf8.ValidString(e.Name) {
+			e.NameRaw = []byte(e.Name)
+			e.Name = ""
+		}
+	}
+
+	// check early for dup name
+	if e.Type == FileType {
+		cName := filepath.Clean(e.GetName())
+		if _, ok := jp.seen[cName]; ok {
+			return -1, ErrDuplicatePath
+		}
+		jp.seen[cName] = struct{}{}
+	}
+
+	e.Position = jp.pos
+	err := jp.e.Encode(e)
+	if err != nil {
+		return -1, err
+	}
+
+	// made it this far, increment now
+	jp.pos++
+	return e.Position, nil
+}
+
+// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
+// FileType) as a json document.
+//
+// The Entries are delimited by new line.
+func NewJSONPacker(w io.Writer) Packer {
+	return &jsonPacker{
+		w:    w,
+		e:    json.NewEncoder(w),
+		seen: seenNames{},
+	}
+}
+
+/*
+TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck
+(https://github.com/ugorji/go)
+
+
+Even though, since our jsonUnpacker and jsonPacker just take
+io.Reader/io.Writer, then we can get away with passing them a
+gzip.Reader/gzip.Writer
+*/