1
0
Fork 0

Vendor integration dependencies.

This commit is contained in:
Timo Reimann 2017-02-07 22:33:23 +01:00
parent dd5e3fba01
commit 55b57c736b
2451 changed files with 731611 additions and 0 deletions

View file

@ -0,0 +1,130 @@
package asm
import (
"bytes"
"fmt"
"hash"
"hash/crc64"
"io"
"sync"
"github.com/vbatts/tar-split/tar/storage"
)
// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
// stream.
//
// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
// and a storage.Unpacker, which has access to the rawbytes and file order
// metadata. With the combination of these two items, a precise assembled Tar
// archive is possible.
func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser {
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
if fg == nil || up == nil {
return nil
}
pr, pw := io.Pipe()
go func() {
err := WriteOutputTarStream(fg, up, pw)
if err != nil {
pw.CloseWithError(err)
} else {
pw.Close()
}
}()
return pr
}
// WriteOutputTarStream writes assembled tar archive to a writer.
func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
if fg == nil || up == nil {
return nil
}
var copyBuffer []byte
var crcHash hash.Hash
var crcSum []byte
var multiWriter io.Writer
for {
entry, err := up.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
switch entry.Type {
case storage.SegmentType:
if _, err := w.Write(entry.Payload); err != nil {
return err
}
case storage.FileType:
if entry.Size == 0 {
continue
}
fh, err := fg.Get(entry.GetName())
if err != nil {
return err
}
if crcHash == nil {
crcHash = crc64.New(storage.CRCTable)
crcSum = make([]byte, 8)
multiWriter = io.MultiWriter(w, crcHash)
copyBuffer = byteBufferPool.Get().([]byte)
defer byteBufferPool.Put(copyBuffer)
} else {
crcHash.Reset()
}
if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
fh.Close()
return err
}
if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
// I would rather this be a comparable ErrInvalidChecksum or such,
// but since it's coming through the PipeReader, the context of
// _which_ file would be lost...
fh.Close()
return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
}
fh.Close()
}
}
}
var byteBufferPool = &sync.Pool{
New: func() interface{} {
return make([]byte, 32*1024)
},
}
// copyWithBuffer is taken from stdlib io.Copy implementation
// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
for {
nr, er := src.Read(buf)
if nr > 0 {
nw, ew := dst.Write(buf[0:nr])
if nw > 0 {
written += int64(nw)
}
if ew != nil {
err = ew
break
}
if nr != nw {
err = io.ErrShortWrite
break
}
}
if er == io.EOF {
break
}
if er != nil {
err = er
break
}
}
return written, err
}

View file

@ -0,0 +1,141 @@
package asm
import (
"io"
"io/ioutil"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)
// NewInputTarStream wraps the Reader stream of a tar archive and provides a
// Reader stream of the same.
//
// In the middle it will pack the segments and file metadata to storage.Packer
// `p`.
//
// The the storage.FilePutter is where payload of files in the stream are
// stashed. If this stashing is not needed, you can provide a nil
// storage.FilePutter. Since the checksumming is still needed, then a default
// of NewDiscardFilePutter will be used internally
func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
// What to do here... folks will want their own access to the Reader that is
// their tar archive stream, but we'll need that same stream to use our
// forked 'archive/tar'.
// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
// from, and we'll MITM the stream to store metadata.
// We'll need a storage.FilePutter too ...
// Another concern, whether to do any storage.FilePutter operations, such that we
// don't extract any amount of the archive. But then again, we're not making
// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
// Perhaps we have a DiscardFilePutter that is a bit bucket.
// we'll return the pipe reader, since TeeReader does not buffer and will
// only read what the outputRdr Read's. Since Tar archives have padding on
// the end, we want to be the one reading the padding, even if the user's
// `archive/tar` doesn't care.
pR, pW := io.Pipe()
outputRdr := io.TeeReader(r, pW)
// we need a putter that will generate the crc64 sums of file payloads
if fp == nil {
fp = storage.NewDiscardFilePutter()
}
go func() {
tr := tar.NewReader(outputRdr)
tr.RawAccounting = true
for {
hdr, err := tr.Next()
if err != nil {
if err != io.EOF {
pW.CloseWithError(err)
return
}
// even when an EOF is reached, there is often 1024 null bytes on
// the end of an archive. Collect them too.
if b := tr.RawBytes(); len(b) > 0 {
_, err := p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
break // not return. We need the end of the reader.
}
if hdr == nil {
break // not return. We need the end of the reader.
}
if b := tr.RawBytes(); len(b) > 0 {
_, err := p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
var csum []byte
if hdr.Size > 0 {
var err error
_, csum, err = fp.Put(hdr.Name, tr)
if err != nil {
pW.CloseWithError(err)
return
}
}
entry := storage.Entry{
Type: storage.FileType,
Size: hdr.Size,
Payload: csum,
}
// For proper marshalling of non-utf8 characters
entry.SetName(hdr.Name)
// File entries added, regardless of size
_, err = p.AddEntry(entry)
if err != nil {
pW.CloseWithError(err)
return
}
if b := tr.RawBytes(); len(b) > 0 {
_, err = p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: b,
})
if err != nil {
pW.CloseWithError(err)
return
}
}
}
// it is allowable, and not uncommon that there is further padding on the
// end of an archive, apart from the expected 1024 null bytes.
remainder, err := ioutil.ReadAll(outputRdr)
if err != nil && err != io.EOF {
pW.CloseWithError(err)
return
}
_, err = p.AddEntry(storage.Entry{
Type: storage.SegmentType,
Payload: remainder,
})
if err != nil {
pW.CloseWithError(err)
return
}
pW.Close()
}()
return pR, nil
}

View file

@ -0,0 +1,9 @@
/*
Package asm provides the API for streaming assembly and disassembly of tar
archives.
Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the
metadata for a stream, as well as an implementation of Getting/Putting the file
entries' payload.
*/
package asm