Vendor integration dependencies.

This commit is contained in:
Timo Reimann 2017-02-07 22:33:23 +01:00
parent dd5e3fba01
commit 55b57c736b
2451 changed files with 731611 additions and 0 deletions

28
integration/vendor/github.com/vbatts/tar-split/LICENSE generated vendored Normal file
View file

@ -0,0 +1,28 @@
Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,340 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tar implements access to tar archives.
// It aims to cover most of the variations, including those produced
// by GNU and BSD tars.
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
package tar
import (
"bytes"
"errors"
"fmt"
"os"
"path"
"time"
)
const (
blockSize = 512
// Types
TypeReg = '0' // regular file
TypeRegA = '\x00' // regular file
TypeLink = '1' // hard link
TypeSymlink = '2' // symbolic link
TypeChar = '3' // character device node
TypeBlock = '4' // block device node
TypeDir = '5' // directory
TypeFifo = '6' // fifo node
TypeCont = '7' // reserved
TypeXHeader = 'x' // extended header
TypeXGlobalHeader = 'g' // global extended header
TypeGNULongName = 'L' // Next file has a long name
TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name
TypeGNUSparse = 'S' // sparse file
)
// A Header represents a single header in a tar archive.
// Some fields may not be populated.
type Header struct {
Name string // name of header file entry
Mode int64 // permission and mode bits
Uid int // user id of owner
Gid int // group id of owner
Size int64 // length in bytes
ModTime time.Time // modified time
Typeflag byte // type of header entry
Linkname string // target name of link
Uname string // user name of owner
Gname string // group name of owner
Devmajor int64 // major number of character or block device
Devminor int64 // minor number of character or block device
AccessTime time.Time // access time
ChangeTime time.Time // status change time
Xattrs map[string]string
}
// File name constants from the tar spec.
const (
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
)
// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
}
// headerFileInfo implements os.FileInfo.
type headerFileInfo struct {
h *Header
}
func (fi headerFileInfo) Size() int64 { return fi.h.Size }
func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
func (fi headerFileInfo) Sys() interface{} { return fi.h }
// Name returns the base name of the file.
func (fi headerFileInfo) Name() string {
if fi.IsDir() {
return path.Base(path.Clean(fi.h.Name))
}
return path.Base(fi.h.Name)
}
// Mode returns the permission and mode bits for the headerFileInfo.
func (fi headerFileInfo) Mode() (mode os.FileMode) {
// Set file permission bits.
mode = os.FileMode(fi.h.Mode).Perm()
// Set setuid, setgid and sticky bits.
if fi.h.Mode&c_ISUID != 0 {
// setuid
mode |= os.ModeSetuid
}
if fi.h.Mode&c_ISGID != 0 {
// setgid
mode |= os.ModeSetgid
}
if fi.h.Mode&c_ISVTX != 0 {
// sticky
mode |= os.ModeSticky
}
// Set file mode bits.
// clear perm, setuid, setgid and sticky bits.
m := os.FileMode(fi.h.Mode) &^ 07777
if m == c_ISDIR {
// directory
mode |= os.ModeDir
}
if m == c_ISFIFO {
// named pipe (FIFO)
mode |= os.ModeNamedPipe
}
if m == c_ISLNK {
// symbolic link
mode |= os.ModeSymlink
}
if m == c_ISBLK {
// device file
mode |= os.ModeDevice
}
if m == c_ISCHR {
// Unix character device
mode |= os.ModeDevice
mode |= os.ModeCharDevice
}
if m == c_ISSOCK {
// Unix domain socket
mode |= os.ModeSocket
}
switch fi.h.Typeflag {
case TypeSymlink:
// symbolic link
mode |= os.ModeSymlink
case TypeChar:
// character device node
mode |= os.ModeDevice
mode |= os.ModeCharDevice
case TypeBlock:
// block device node
mode |= os.ModeDevice
case TypeDir:
// directory
mode |= os.ModeDir
case TypeFifo:
// fifo node
mode |= os.ModeNamedPipe
}
return mode
}
// sysStat, if non-nil, populates h from system-dependent fields of fi.
var sysStat func(fi os.FileInfo, h *Header) error
// Mode constants from the tar spec.
const (
c_ISUID = 04000 // Set uid
c_ISGID = 02000 // Set gid
c_ISVTX = 01000 // Save text (sticky bit)
c_ISDIR = 040000 // Directory
c_ISFIFO = 010000 // FIFO
c_ISREG = 0100000 // Regular file
c_ISLNK = 0120000 // Symbolic link
c_ISBLK = 060000 // Block special file
c_ISCHR = 020000 // Character special file
c_ISSOCK = 0140000 // Socket
)
// Keywords for the PAX Extended Header
const (
paxAtime = "atime"
paxCharset = "charset"
paxComment = "comment"
paxCtime = "ctime" // please note that ctime is not a valid pax header.
paxGid = "gid"
paxGname = "gname"
paxLinkpath = "linkpath"
paxMtime = "mtime"
paxPath = "path"
paxSize = "size"
paxUid = "uid"
paxUname = "uname"
paxXattr = "SCHILY.xattr."
paxNone = ""
)
// FileInfoHeader creates a partially-populated Header from fi.
// If fi describes a symlink, FileInfoHeader records link as the link target.
// If fi describes a directory, a slash is appended to the name.
// Because os.FileInfo's Name method returns only the base name of
// the file it describes, it may be necessary to modify the Name field
// of the returned header to provide the full path name of the file.
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
if fi == nil {
return nil, errors.New("tar: FileInfo is nil")
}
fm := fi.Mode()
h := &Header{
Name: fi.Name(),
ModTime: fi.ModTime(),
Mode: int64(fm.Perm()), // or'd with c_IS* constants later
}
switch {
case fm.IsRegular():
h.Mode |= c_ISREG
h.Typeflag = TypeReg
h.Size = fi.Size()
case fi.IsDir():
h.Typeflag = TypeDir
h.Mode |= c_ISDIR
h.Name += "/"
case fm&os.ModeSymlink != 0:
h.Typeflag = TypeSymlink
h.Mode |= c_ISLNK
h.Linkname = link
case fm&os.ModeDevice != 0:
if fm&os.ModeCharDevice != 0 {
h.Mode |= c_ISCHR
h.Typeflag = TypeChar
} else {
h.Mode |= c_ISBLK
h.Typeflag = TypeBlock
}
case fm&os.ModeNamedPipe != 0:
h.Typeflag = TypeFifo
h.Mode |= c_ISFIFO
case fm&os.ModeSocket != 0:
h.Mode |= c_ISSOCK
default:
return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
}
if fm&os.ModeSetuid != 0 {
h.Mode |= c_ISUID
}
if fm&os.ModeSetgid != 0 {
h.Mode |= c_ISGID
}
if fm&os.ModeSticky != 0 {
h.Mode |= c_ISVTX
}
// If possible, populate additional fields from OS-specific
// FileInfo fields.
if sys, ok := fi.Sys().(*Header); ok {
// This FileInfo came from a Header (not the OS). Use the
// original Header to populate all remaining fields.
h.Uid = sys.Uid
h.Gid = sys.Gid
h.Uname = sys.Uname
h.Gname = sys.Gname
h.AccessTime = sys.AccessTime
h.ChangeTime = sys.ChangeTime
if sys.Xattrs != nil {
h.Xattrs = make(map[string]string)
for k, v := range sys.Xattrs {
h.Xattrs[k] = v
}
}
if sys.Typeflag == TypeLink {
// hard link
h.Typeflag = TypeLink
h.Size = 0
h.Linkname = sys.Linkname
}
}
if sysStat != nil {
return h, sysStat(fi, h)
}
return h, nil
}
var zeroBlock = make([]byte, blockSize)
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
// We compute and return both.
func checksum(header []byte) (unsigned int64, signed int64) {
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
unsigned += ' ' * 8
signed += ' ' * 8
i += 7
continue
}
unsigned += int64(header[i])
signed += int64(int8(header[i]))
}
return
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp
b, *sp = s[0:n], s[n:]
return
}
func isASCII(s string) bool {
for _, c := range s {
if c >= 0x80 {
return false
}
}
return true
}
func toASCII(s string) string {
if isASCII(s) {
return s
}
var buf bytes.Buffer
for _, c := range s {
if c < 0x80 {
buf.WriteByte(byte(c))
}
}
return buf.String()
}
// isHeaderOnlyType checks if the given type flag is of the type that has no
// data section even if a size is specified.
func isHeaderOnlyType(flag byte) bool {
switch flag {
case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
return true
default:
return false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux dragonfly openbsd solaris
package tar
import (
"syscall"
"time"
)
func statAtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Atim.Unix())
}
func statCtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Ctim.Unix())
}

View file

@ -0,0 +1,20 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin freebsd netbsd
package tar
import (
"syscall"
"time"
)
func statAtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Atimespec.Unix())
}
func statCtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Ctimespec.Unix())
}

View file

@ -0,0 +1,32 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux darwin dragonfly freebsd openbsd netbsd solaris
package tar
import (
"os"
"syscall"
)
func init() {
sysStat = statUnix
}
func statUnix(fi os.FileInfo, h *Header) error {
sys, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return nil
}
h.Uid = int(sys.Uid)
h.Gid = int(sys.Gid)
// TODO(bradfitz): populate username & group. os/user
// doesn't cache LookupId lookups, and lacks group
// lookup functions.
h.AccessTime = statAtime(sys)
h.ChangeTime = statCtime(sys)
// TODO(bradfitz): major/minor device numbers?
return nil
}

View file

@ -0,0 +1,416 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// TODO(dsymonds):
// - catch more errors (no first header, etc.)
import (
"bytes"
"errors"
"fmt"
"io"
"path"
"sort"
"strconv"
"strings"
"time"
)
var (
ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close")
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
)
// A Writer provides sequential writing of a tar archive in POSIX.1 format.
// A tar archive consists of a sequence of files.
// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
// writing at most hdr.Size bytes in total.
type Writer struct {
w io.Writer
err error
nb int64 // number of unwritten bytes for current file entry
pad int64 // amount of padding to write after current file entry
closed bool
usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use pax header instead of binary numeric header
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
}
type formatter struct {
err error // Last error seen
}
// NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
// Flush finishes writing the current file (optional).
func (tw *Writer) Flush() error {
if tw.nb > 0 {
tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
return tw.err
}
n := tw.nb + tw.pad
for n > 0 && tw.err == nil {
nr := n
if nr > blockSize {
nr = blockSize
}
var nw int
nw, tw.err = tw.w.Write(zeroBlock[0:nr])
n -= int64(nw)
}
tw.nb = 0
tw.pad = 0
return tw.err
}
// Write s into b, terminating it with a NUL if there is room.
func (f *formatter) formatString(b []byte, s string) {
if len(s) > len(b) {
f.err = ErrFieldTooLong
return
}
ascii := toASCII(s)
copy(b, ascii)
if len(ascii) < len(b) {
b[len(ascii)] = 0
}
}
// Encode x as an octal ASCII string and write it into b with leading zeros.
func (f *formatter) formatOctal(b []byte, x int64) {
s := strconv.FormatInt(x, 8)
// leading zeros, but leave room for a NUL.
for len(s)+1 < len(b) {
s = "0" + s
}
f.formatString(b, s)
}
// fitsInBase256 reports whether x can be encoded into n bytes using base-256
// encoding. Unlike octal encoding, base-256 encoding does not require that the
// string ends with a NUL character. Thus, all n bytes are available for output.
//
// If operating in binary mode, this assumes strict GNU binary mode; which means
// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
// equivalent to the sign bit in two's complement form.
func fitsInBase256(n int, x int64) bool {
var binBits = uint(n-1) * 8
return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
}
// Write x into b, as binary (GNUtar/star extension).
func (f *formatter) formatNumeric(b []byte, x int64) {
if fitsInBase256(len(b), x) {
for i := len(b) - 1; i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // Highest bit indicates binary format
return
}
f.formatOctal(b, 0) // Last resort, just write zero
f.err = ErrFieldTooLong
}
var (
minTime = time.Unix(0, 0)
// There is room for 11 octal digits (33 bits) of mtime.
maxTime = minTime.Add((1<<33 - 1) * time.Second)
)
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose.
func (tw *Writer) WriteHeader(hdr *Header) error {
return tw.writeHeader(hdr, true)
}
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose.
// As this method is called internally by writePax header to allow it to
// suppress writing the pax header.
func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
if tw.closed {
return ErrWriteAfterClose
}
if tw.err == nil {
tw.Flush()
}
if tw.err != nil {
return tw.err
}
// a map to hold pax header records, if any are needed
paxHeaders := make(map[string]string)
// TODO(shanemhansen): we might want to use PAX headers for
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
var f formatter
var header []byte
// We need to select which scratch buffer to use carefully,
// since this method is called recursively to write PAX headers.
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
// already being used by the non-recursive call, so we must use paxHdrBuff.
header = tw.hdrBuff[:]
if !allowPax {
header = tw.paxHdrBuff[:]
}
copy(header, zeroBlock)
s := slicer(header)
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
f.formatString(b, s)
}
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
f.formatOctal(b, x)
return
}
// If it is too long for octal, and PAX is preferred, use a PAX header.
if paxKeyword != paxNone && tw.preferPax {
f.formatOctal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
tw.usedBinary = true
f.formatNumeric(b, x)
}
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize)
formatString(pathHeaderBytes, hdr.Name, paxPath)
// Handle out of range ModTime carefully.
var modTime int64
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
modTime = hdr.ModTime.Unix()
}
f.formatOctal(s.next(8), hdr.Mode) // 100:108
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157
formatString(s.next(100), hdr.Linkname, paxLinkpath)
copy(s.next(8), []byte("ustar\x0000")) // 257:265
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155)
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
copy(header[257:265], []byte("ustar \x00"))
}
_, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
prefix, suffix, ok := splitUSTARPath(hdr.Name)
if ok {
// Since we can encode in USTAR format, disable PAX header.
delete(paxHeaders, paxPath)
// Update the path fields
formatString(pathHeaderBytes, suffix, paxNone)
formatString(prefixHeaderBytes, prefix, paxNone)
}
}
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
chksum, _ := checksum(header)
f.formatOctal(header[148:155], chksum) // Never fails
header[155] = ' '
// Check if there were any formatting errors.
if f.err != nil {
tw.err = f.err
return tw.err
}
if allowPax {
for k, v := range hdr.Xattrs {
paxHeaders[paxXattr+k] = v
}
}
if len(paxHeaders) > 0 {
if !allowPax {
return errInvalidHeader
}
if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
return err
}
}
tw.nb = int64(hdr.Size)
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
_, tw.err = tw.w.Write(header)
return tw.err
}
// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
// If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
length := len(name)
if length <= fileNameSize || !isASCII(name) {
return "", "", false
} else if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1
} else if name[length-1] == '/' {
length--
}
i := strings.LastIndex(name[:length], "/")
nlen := len(name) - i - 1 // nlen is length of suffix
plen := i // plen is length of prefix
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
return "", "", false
}
return name[:i], name[i+1:], true
}
// writePaxHeader writes an extended pax header to the
// archive.
func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
// Prepare extended header
ext := new(Header)
ext.Typeflag = TypeXHeader
// Setting ModTime is required for reader parsing to
// succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files
// with the current pid. However, this results in differing outputs
// for identical inputs. As such, the constant 0 is now used instead.
// golang.org/issue/12358
dir, file := path.Split(hdr.Name)
fullName := path.Join(dir, "PaxHeaders.0", file)
ascii := toASCII(fullName)
if len(ascii) > 100 {
ascii = ascii[:100]
}
ext.Name = ascii
// Construct the body
var buf bytes.Buffer
// Keys are sorted before writing to body to allow deterministic output.
var keys []string
for k := range paxHeaders {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
}
ext.Size = int64(len(buf.Bytes()))
if err := tw.writeHeader(ext, false); err != nil {
return err
}
if _, err := tw.Write(buf.Bytes()); err != nil {
return err
}
if err := tw.Flush(); err != nil {
return err
}
return nil
}
// formatPAXRecord formats a single PAX record, prefixing it with the
// appropriate length.
func formatPAXRecord(k, v string) string {
const padding = 3 // Extra padding for ' ', '=', and '\n'
size := len(k) + len(v) + padding
size += len(strconv.Itoa(size))
record := fmt.Sprintf("%d %s=%s\n", size, k, v)
// Final adjustment if adding size field increased the record size.
if len(record) != size {
size = len(record)
record = fmt.Sprintf("%d %s=%s\n", size, k, v)
}
return record
}
// Write writes to the current entry in the tar archive.
// Write returns the error ErrWriteTooLong if more than
// hdr.Size bytes are written after WriteHeader.
func (tw *Writer) Write(b []byte) (n int, err error) {
if tw.closed {
err = ErrWriteAfterClose
return
}
overwrite := false
if int64(len(b)) > tw.nb {
b = b[0:tw.nb]
overwrite = true
}
n, err = tw.w.Write(b)
tw.nb -= int64(n)
if err == nil && overwrite {
err = ErrWriteTooLong
return
}
tw.err = err
return
}
// Close closes the tar archive, flushing any unwritten
// data to the underlying writer.
func (tw *Writer) Close() error {
if tw.err != nil || tw.closed {
return tw.err
}
tw.Flush()
tw.closed = true
if tw.err != nil {
return tw.err
}
// trailer: two zero blocks
for i := 0; i < 2; i++ {
_, tw.err = tw.w.Write(zeroBlock)
if tw.err != nil {
break
}
}
return tw.err
}

View file

@ -0,0 +1,130 @@
package asm
import (
"bytes"
"fmt"
"hash"
"hash/crc64"
"io"
"sync"
"github.com/vbatts/tar-split/tar/storage"
)
// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
// stream.
//
// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
// and a storage.Unpacker, which has access to the rawbytes and file order
// metadata. With the combination of these two items, a precise assembled Tar
// archive is possible.
func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser {
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
if fg == nil || up == nil {
return nil
}
pr, pw := io.Pipe()
go func() {
err := WriteOutputTarStream(fg, up, pw)
if err != nil {
pw.CloseWithError(err)
} else {
pw.Close()
}
}()
return pr
}
// WriteOutputTarStream writes assembled tar archive to a writer.
func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
if fg == nil || up == nil {
return nil
}
var copyBuffer []byte
var crcHash hash.Hash
var crcSum []byte
var multiWriter io.Writer
for {
entry, err := up.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
switch entry.Type {
case storage.SegmentType:
if _, err := w.Write(entry.Payload); err != nil {
return err
}
case storage.FileType:
if entry.Size == 0 {
continue
}
fh, err := fg.Get(entry.GetName())
if err != nil {
return err
}
if crcHash == nil {
crcHash = crc64.New(storage.CRCTable)
crcSum = make([]byte, 8)
multiWriter = io.MultiWriter(w, crcHash)
copyBuffer = byteBufferPool.Get().([]byte)
defer byteBufferPool.Put(copyBuffer)
} else {
crcHash.Reset()
}
if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
fh.Close()
return err
}
if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
// I would rather this be a comparable ErrInvalidChecksum or such,
// but since it's coming through the PipeReader, the context of
// _which_ file would be lost...
fh.Close()
return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
}
fh.Close()
}
}
}
var byteBufferPool = &sync.Pool{
New: func() interface{} {
return make([]byte, 32*1024)
},
}
// copyWithBuffer is taken from stdlib io.Copy implementation
// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
for {
nr, er := src.Read(buf)
if nr > 0 {
nw, ew := dst.Write(buf[0:nr])
if nw > 0 {
written += int64(nw)
}
if ew != nil {
err = ew
break
}
if nr != nw {
err = io.ErrShortWrite
break
}
}
if er == io.EOF {
break
}
if er != nil {
err = er
break
}
}
return written, err
}

View file

@ -0,0 +1,141 @@
package asm
import (
"io"
"io/ioutil"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)
// NewInputTarStream wraps the Reader stream of a tar archive and provides a
// Reader stream of the same.
//
// In the middle it will pack the segments and file metadata to storage.Packer
// `p`.
//
// The the storage.FilePutter is where payload of files in the stream are
// stashed. If this stashing is not needed, you can provide a nil
// storage.FilePutter. Since the checksumming is still needed, then a default
// of NewDiscardFilePutter will be used internally
func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
// What to do here... folks will want their own access to the Reader that is
// their tar archive stream, but we'll need that same stream to use our
// forked 'archive/tar'.
// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
// from, and we'll MITM the stream to store metadata.
// We'll need a storage.FilePutter too ...
// Another concern, whether to do any storage.FilePutter operations, such that we
// don't extract any amount of the archive. But then again, we're not making
// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
// Perhaps we have a DiscardFilePutter that is a bit bucket.
// we'll return the pipe reader, since TeeReader does not buffer and will
// only read what the outputRdr Read's. Since Tar archives have padding on
// the end, we want to be the one reading the padding, even if the user's
// `archive/tar` doesn't care.
pR, pW := io.Pipe()
outputRdr := io.TeeReader(r, pW)
// we need a putter that will generate the crc64 sums of file payloads
if fp == nil {
fp = storage.NewDiscardFilePutter()
}
go func() {
tr := tar.NewReader(outputRdr)
tr.RawAccounting = true
for {
hdr, err := tr.Next()
if err != nil {
if err != io.EOF {
pW.CloseWithError(err)
return
}
// even when an EOF is reached, there is often 1024 null bytes on
// the end of an archive. Collect them too.
if b := tr.RawBytes(); len(b) > 0 {
_, err := p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
break // not return. We need the end of the reader.
}
if hdr == nil {
break // not return. We need the end of the reader.
}
if b := tr.RawBytes(); len(b) > 0 {
_, err := p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
var csum []byte
if hdr.Size > 0 {
var err error
_, csum, err = fp.Put(hdr.Name, tr)
if err != nil {
pW.CloseWithError(err)
return
}
}
entry := storage.Entry{
Type: storage.FileType,
Size: hdr.Size,
Payload: csum,
}
// For proper marshalling of non-utf8 characters
entry.SetName(hdr.Name)
// File entries added, regardless of size
_, err = p.AddEntry(entry)
if err != nil {
pW.CloseWithError(err)
return
}
if b := tr.RawBytes(); len(b) > 0 {
_, err = p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
}
// it is allowable, and not uncommon that there is further padding on the
// end of an archive, apart from the expected 1024 null bytes.
remainder, err := ioutil.ReadAll(outputRdr)
if err != nil && err != io.EOF {
pW.CloseWithError(err)
return
}
_, err = p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: remainder,
})
if err != nil {
pW.CloseWithError(err)
return
}
pW.Close()
}()
return pR, nil
}

View file

@ -0,0 +1,9 @@
/*
Package asm provides the API for streaming assembly and disassembly of tar
archives.
Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the
metadata for a stream, as well as an implementation of Getting/Putting the file
entries' payload.
*/
package asm

View file

@ -0,0 +1,12 @@
/*
Package storage is for metadata of a tar archive.
Packing and unpacking the Entries of the stream. The types of streams are
either segments of raw bytes (for the raw headers and various padding) and for
an entry marking a file payload.
The raw bytes are stored precisely in the packed (marshalled) Entry, whereas
the file payload marker include the name of the file, size, and crc64 checksum
(for basic file integrity).
*/
package storage

View file

@ -0,0 +1,78 @@
package storage
import "unicode/utf8"
// Entries is for sorting by Position
type Entries []Entry
func (e Entries) Len() int { return len(e) }
func (e Entries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e Entries) Less(i, j int) bool { return e[i].Position < e[j].Position }
// Type of Entry
type Type int
const (
// FileType represents a file payload from the tar stream.
//
// This will be used to map to relative paths on disk. Only Size > 0 will get
// read into a resulting output stream (due to hardlinks).
FileType Type = 1 + iota
// SegmentType represents a raw bytes segment from the archive stream. These raw
// byte segments consist of the raw headers and various padding.
//
// Its payload is to be marshalled base64 encoded.
SegmentType
)
// Entry is the structure for packing and unpacking the information read from
// the Tar archive.
//
// FileType Payload checksum is using `hash/crc64` for basic file integrity,
// _not_ for cryptography.
// From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000
// collisions in a sample of 18.2 million, CRC64 had none.
type Entry struct {
Type Type `json:"type"`
Name string `json:"name,omitempty"`
NameRaw []byte `json:"name_raw,omitempty"`
Size int64 `json:"size,omitempty"`
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
Position int `json:"position"`
}
// SetName will check name for valid UTF-8 string, and set the appropriate
// field. See https://github.com/vbatts/tar-split/issues/17
func (e *Entry) SetName(name string) {
if utf8.ValidString(name) {
e.Name = name
} else {
e.NameRaw = []byte(name)
}
}
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
// field
func (e *Entry) SetNameBytes(name []byte) {
if utf8.Valid(name) {
e.Name = string(name)
} else {
e.NameRaw = name
}
}
// GetName returns the string for the entry's name, regardless of the field stored in
func (e *Entry) GetName() string {
if len(e.NameRaw) > 0 {
return string(e.NameRaw)
}
return e.Name
}
// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
func (e *Entry) GetNameBytes() []byte {
if len(e.NameRaw) > 0 {
return e.NameRaw
}
return []byte(e.Name)
}

View file

@ -0,0 +1,104 @@
package storage
import (
"bytes"
"errors"
"hash/crc64"
"io"
"os"
"path/filepath"
)
// FileGetter is the interface for getting a stream of a file payload,
// addressed by name/filename. Presumably, the names will be scoped to relative
// file paths.
type FileGetter interface {
// Get returns a stream for the provided file path
Get(filename string) (output io.ReadCloser, err error)
}
// FilePutter is the interface for storing a stream of a file payload,
// addressed by name/filename.
type FilePutter interface {
// Put returns the size of the stream received, and the crc64 checksum for
// the provided stream
Put(filename string, input io.Reader) (size int64, checksum []byte, err error)
}
// FileGetPutter is the interface that groups both Getting and Putting file
// payloads.
type FileGetPutter interface {
FileGetter
FilePutter
}
// NewPathFileGetter returns a FileGetter that is for files relative to path
// relpath.
func NewPathFileGetter(relpath string) FileGetter {
return &pathFileGetter{root: relpath}
}
type pathFileGetter struct {
root string
}
func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) {
return os.Open(filepath.Join(pfg.root, filename))
}
type bufferFileGetPutter struct {
files map[string][]byte
}
func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) {
if _, ok := bfgp.files[name]; !ok {
return nil, errors.New("no such file")
}
b := bytes.NewBuffer(bfgp.files[name])
return &readCloserWrapper{b}, nil
}
func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) {
crc := crc64.New(CRCTable)
buf := bytes.NewBuffer(nil)
cw := io.MultiWriter(crc, buf)
i, err := io.Copy(cw, r)
if err != nil {
return 0, nil, err
}
bfgp.files[name] = buf.Bytes()
return i, crc.Sum(nil), nil
}
type readCloserWrapper struct {
io.Reader
}
func (w *readCloserWrapper) Close() error { return nil }
// NewBufferFileGetPutter is a simple in-memory FileGetPutter
//
// Implication is this is memory intensive...
// Probably best for testing or light weight cases.
func NewBufferFileGetPutter() FileGetPutter {
return &bufferFileGetPutter{
files: map[string][]byte{},
}
}
// NewDiscardFilePutter is a bit bucket FilePutter
func NewDiscardFilePutter() FilePutter {
return &bitBucketFilePutter{}
}
type bitBucketFilePutter struct {
}
func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) {
c := crc64.New(CRCTable)
i, err := io.Copy(c, r)
return i, c.Sum(nil), err
}
// CRCTable is the default table used for crc64 sum calculations
var CRCTable = crc64.MakeTable(crc64.ISO)

View file

@ -0,0 +1,127 @@
package storage
import (
"encoding/json"
"errors"
"io"
"path/filepath"
"unicode/utf8"
)
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
// same file path
var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
// Packer describes the methods to pack Entries to a storage destination
type Packer interface {
// AddEntry packs the Entry and returns its position
AddEntry(e Entry) (int, error)
}
// Unpacker describes the methods to read Entries from a source
type Unpacker interface {
// Next returns the next Entry being unpacked, or error, until io.EOF
Next() (*Entry, error)
}
/* TODO(vbatts) figure out a good model for this
type PackUnpacker interface {
Packer
Unpacker
}
*/
type jsonUnpacker struct {
seen seenNames
dec *json.Decoder
}
func (jup *jsonUnpacker) Next() (*Entry, error) {
var e Entry
err := jup.dec.Decode(&e)
if err != nil {
return nil, err
}
// check for dup name
if e.Type == FileType {
cName := filepath.Clean(e.GetName())
if _, ok := jup.seen[cName]; ok {
return nil, ErrDuplicatePath
}
jup.seen[cName] = struct{}{}
}
return &e, err
}
// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and
// FileType) as a json document.
//
// Each Entry read are expected to be delimited by new line.
func NewJSONUnpacker(r io.Reader) Unpacker {
return &jsonUnpacker{
dec: json.NewDecoder(r),
seen: seenNames{},
}
}
type jsonPacker struct {
w io.Writer
e *json.Encoder
pos int
seen seenNames
}
type seenNames map[string]struct{}
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
// if Name is not valid utf8, switch it to raw first.
if e.Name != "" {
if !utf8.ValidString(e.Name) {
e.NameRaw = []byte(e.Name)
e.Name = ""
}
}
// check early for dup name
if e.Type == FileType {
cName := filepath.Clean(e.GetName())
if _, ok := jp.seen[cName]; ok {
return -1, ErrDuplicatePath
}
jp.seen[cName] = struct{}{}
}
e.Position = jp.pos
err := jp.e.Encode(e)
if err != nil {
return -1, err
}
// made it this far, increment now
jp.pos++
return e.Position, nil
}
// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
// FileType) as a json document.
//
// The Entries are delimited by new line.
func NewJSONPacker(w io.Writer) Packer {
return &jsonPacker{
w: w,
e: json.NewEncoder(w),
seen: seenNames{},
}
}
/*
TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck
(https://github.com/ugorji/go)
Even though, since our jsonUnpacker and jsonPacker just take
io.Reader/io.Writer, then we can get away with passing them a
gzip.Reader/gzip.Writer
*/