Migrate to dep 0.4

This commit is contained in:
Ludovic Fernandez 2018-02-07 23:30:05 +01:00 committed by Traefiker
parent dbd173b4e4
commit 7b19cb5631
255 changed files with 2233 additions and 35153 deletions

View file

@ -1,132 +0,0 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"syscall"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var checkpointCommand = cli.Command{
Name: "checkpoint",
Usage: "checkpoint a running container",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container to be
checkpointed.`,
Description: `The checkpoint command saves the state of the container instance.`,
Flags: []cli.Flag{
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc checkpoint requires root")
}
container, err := getContainer(context)
if err != nil {
return err
}
status, err := container.Status()
if err != nil {
return err
}
if status == libcontainer.Created {
fatalf("Container cannot be checkpointed in created state")
}
defer destroy(container)
options := criuOptions(context)
// these are the mandatory criu options for a container
setPageServer(context, options)
setManageCgroupsMode(context, options)
if err := setEmptyNsMask(context, options); err != nil {
return err
}
if err := container.Checkpoint(options); err != nil {
return err
}
return nil
},
}
func getCheckpointImagePath(context *cli.Context) string {
imagePath := context.String("image-path")
if imagePath == "" {
imagePath = getDefaultImagePath(context)
}
return imagePath
}
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
if psOpt := context.String("page-server"); psOpt != "" {
addressPort := strings.Split(psOpt, ":")
if len(addressPort) != 2 {
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
}
portInt, err := strconv.Atoi(addressPort[1])
if err != nil {
fatal(fmt.Errorf("Invalid port number"))
}
options.PageServer = libcontainer.CriuPageServerInfo{
Address: addressPort[0],
Port: int32(portInt),
}
}
}
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
switch cgOpt {
case "soft":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
case "full":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
case "strict":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
default:
fatal(fmt.Errorf("Invalid manage cgroups mode"))
}
}
}
var namespaceMapping = map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: syscall.CLONE_NEWNET,
}
func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
var nsmask int
for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return fmt.Errorf("namespace %q is not supported", ns)
}
nsmask |= f
}
options.EmptyNs = uint32(nsmask)
return nil
}

View file

@ -1,74 +0,0 @@
package main
import (
"os"
"github.com/urfave/cli"
)
var createCommand = cli.Command{
Name: "create",
Usage: "create a container",
ArgsUsage: `<container-id>
Where "<container-id>" is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique on
your host.`,
Description: `The create command creates an instance of a container for a bundle. The bundle
is a directory with a specification file named "` + specConfig + `" and a root
filesystem.
The specification file includes an args parameter. The args parameter is used
to specify command(s) that get run when the container is started. To change the
command(s) that get executed on start, edit the args parameter of the spec. See
"runc spec --help" for more explanation.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console-socket",
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.BoolFlag{
Name: "no-pivot",
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
},
cli.BoolFlag{
Name: "no-new-keyring",
Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key",
},
cli.IntFlag{
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
if err := revisePidFile(context); err != nil {
return err
}
spec, err := setupSpec(context)
if err != nil {
return err
}
status, err := startContainer(context, spec, true)
if err != nil {
return err
}
// exit with the container's exit status so any external supervisor is
// notified of the exit with the correct exit status.
os.Exit(status)
return nil
},
}

View file

@ -1,84 +0,0 @@
// +build !solaris
package main
import (
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"github.com/opencontainers/runc/libcontainer"
"github.com/urfave/cli"
)
func killContainer(container libcontainer.Container) error {
_ = container.Signal(syscall.SIGKILL, false)
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
if err := container.Signal(syscall.Signal(0), false); err != nil {
destroy(container)
return nil
}
}
return fmt.Errorf("container init still running")
}
var deleteCommand = cli.Command{
Name: "delete",
Usage: "delete any resources held by the container often used with detached container",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container.
EXAMPLE:
For example, if the container id is "ubuntu01" and runc list currently shows the
status of "ubuntu01" as "stopped" the following will delete resources held for
"ubuntu01" removing "ubuntu01" from the runc list of containers:
# runc delete ubuntu01`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "force, f",
Usage: "Forcibly deletes the container if it is still running (uses SIGKILL)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
id := context.Args().First()
container, err := getContainer(context)
if err != nil {
if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
// if there was an aborted start or something of the sort then the container's directory could exist but
// libcontainer does not see it because the state.json file inside that directory was never created.
path := filepath.Join(context.GlobalString("root"), id)
if e := os.RemoveAll(path); e != nil {
fmt.Fprintf(os.Stderr, "remove %s: %v\n", path, e)
}
}
return err
}
s, err := container.Status()
if err != nil {
return err
}
switch s {
case libcontainer.Stopped:
destroy(container)
case libcontainer.Created:
return killContainer(container)
default:
if context.Bool("force") {
return killContainer(container)
} else {
return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
}
}
return nil
},
}

View file

@ -1,260 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/urfave/cli"
)
// event struct for encoding the event data to json.
type event struct {
Type string `json:"type"`
ID string `json:"id"`
Data interface{} `json:"data,omitempty"`
}
// stats is the runc specific stats structure for stability when encoding and decoding stats.
type stats struct {
CPU cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
}
type hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type blkioEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}
type blkio struct {
IoServiceBytesRecursive []blkioEntry `json:"ioServiceBytesRecursive,omitempty"`
IoServicedRecursive []blkioEntry `json:"ioServicedRecursive,omitempty"`
IoQueuedRecursive []blkioEntry `json:"ioQueueRecursive,omitempty"`
IoServiceTimeRecursive []blkioEntry `json:"ioServiceTimeRecursive,omitempty"`
IoWaitTimeRecursive []blkioEntry `json:"ioWaitTimeRecursive,omitempty"`
IoMergedRecursive []blkioEntry `json:"ioMergedRecursive,omitempty"`
IoTimeRecursive []blkioEntry `json:"ioTimeRecursive,omitempty"`
SectorsRecursive []blkioEntry `json:"sectorsRecursive,omitempty"`
}
type pids struct {
Current uint64 `json:"current,omitempty"`
Limit uint64 `json:"limit,omitempty"`
}
type throttling struct {
Periods uint64 `json:"periods,omitempty"`
ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"`
ThrottledTime uint64 `json:"throttledTime,omitempty"`
}
type cpuUsage struct {
// Units: nanoseconds.
Total uint64 `json:"total,omitempty"`
Percpu []uint64 `json:"percpu,omitempty"`
Kernel uint64 `json:"kernel"`
User uint64 `json:"user"`
}
type cpu struct {
Usage cpuUsage `json:"usage,omitempty"`
Throttling throttling `json:"throttling,omitempty"`
}
type memoryEntry struct {
Limit uint64 `json:"limit"`
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type memory struct {
Cache uint64 `json:"cache,omitempty"`
Usage memoryEntry `json:"usage,omitempty"`
Swap memoryEntry `json:"swap,omitempty"`
Kernel memoryEntry `json:"kernel,omitempty"`
KernelTCP memoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
}
var eventsCommand = cli.Command{
Name: "events",
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container.`,
Description: `The events command displays information about the container. By default the
information is displayed once every 5 seconds.`,
Flags: []cli.Flag{
cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"},
cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
duration := context.Duration("interval")
if duration <= 0 {
return fmt.Errorf("duration interval must be greater than 0")
}
status, err := container.Status()
if err != nil {
return err
}
if status == libcontainer.Stopped {
return fmt.Errorf("container with id %s is not running", container.ID())
}
var (
stats = make(chan *libcontainer.Stats, 1)
events = make(chan *event, 1024)
group = &sync.WaitGroup{}
)
group.Add(1)
go func() {
defer group.Done()
enc := json.NewEncoder(os.Stdout)
for e := range events {
if err := enc.Encode(e); err != nil {
logrus.Error(err)
}
}
}()
if context.Bool("stats") {
s, err := container.Stats()
if err != nil {
return err
}
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
close(events)
group.Wait()
return nil
}
go func() {
for range time.Tick(context.Duration("interval")) {
s, err := container.Stats()
if err != nil {
logrus.Error(err)
continue
}
stats <- s
}
}()
n, err := container.NotifyOOM()
if err != nil {
return err
}
for {
select {
case _, ok := <-n:
if ok {
// this means an oom event was received, if it is !ok then
// the channel was closed because the container stopped and
// the cgroups no longer exist.
events <- &event{Type: "oom", ID: container.ID()}
} else {
n = nil
}
case s := <-stats:
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
}
if n == nil {
close(events)
break
}
}
group.Wait()
return nil
},
}
func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
cg := ls.CgroupStats
if cg == nil {
return nil
}
var s stats
s.Pids.Current = cg.PidsStats.Current
s.Pids.Limit = cg.PidsStats.Limit
s.CPU.Usage.Kernel = cg.CpuStats.CpuUsage.UsageInKernelmode
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
s.Memory.Cache = cg.MemoryStats.Cache
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
s.Blkio.IoQueuedRecursive = convertBlkioEntry(cg.BlkioStats.IoQueuedRecursive)
s.Blkio.IoServiceTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceTimeRecursive)
s.Blkio.IoWaitTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoWaitTimeRecursive)
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
s.Hugetlb = make(map[string]hugetlb)
for k, v := range cg.HugetlbStats {
s.Hugetlb[k] = convertHugtlb(v)
}
return &s
}
func convertHugtlb(c cgroups.HugetlbStats) hugetlb {
return hugetlb{
Usage: c.Usage,
Max: c.MaxUsage,
Failcnt: c.Failcnt,
}
}
func convertMemoryEntry(c cgroups.MemoryData) memoryEntry {
return memoryEntry{
Limit: c.Limit,
Usage: c.Usage,
Max: c.MaxUsage,
Failcnt: c.Failcnt,
}
}
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
var out []blkioEntry
for _, e := range c {
out = append(out, blkioEntry{
Major: e.Major,
Minor: e.Minor,
Op: e.Op,
Value: e.Value,
})
}
return out
}

View file

@ -1,211 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var execCommand = cli.Command{
Name: "exec",
Usage: "execute new process inside the container",
ArgsUsage: `<container-id> <command> [command options] || -p process.json <container-id>
Where "<container-id>" is the name for the instance of the container and
"<command>" is the command to be executed in the container.
"<command>" can't be empty unless a "-p" flag provided.
EXAMPLE:
For example, if the container is configured to run the linux ps command the
following will output a list of processes running in the container:
# runc exec <container-id> ps`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "console-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "cwd",
Usage: "current working directory in the container",
},
cli.StringSliceFlag{
Name: "env, e",
Usage: "set environment variables",
},
cli.BoolFlag{
Name: "tty, t",
Usage: "allocate a pseudo-TTY",
},
cli.StringFlag{
Name: "user, u",
Usage: "UID (format: <uid>[:<gid>])",
},
cli.StringFlag{
Name: "process, p",
Usage: "path to the process.json",
},
cli.BoolFlag{
Name: "detach,d",
Usage: "detach from the container's process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.StringFlag{
Name: "process-label",
Usage: "set the asm process label for the process commonly used with selinux",
},
cli.StringFlag{
Name: "apparmor",
Usage: "set the apparmor profile for the process",
},
cli.BoolFlag{
Name: "no-new-privs",
Usage: "set the no new privileges value for the process",
},
cli.StringSliceFlag{
Name: "cap, c",
Value: &cli.StringSlice{},
Usage: "add a capability to the bounding set for the process",
},
cli.BoolFlag{
Name: "no-subreaper",
Usage: "disable the use of the subreaper used to reap reparented processes",
Hidden: true,
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if err := revisePidFile(context); err != nil {
return err
}
status, err := execProcess(context)
if err == nil {
os.Exit(status)
}
return fmt.Errorf("exec failed: %v", err)
},
SkipArgReorder: true,
}
func execProcess(context *cli.Context) (int, error) {
container, err := getContainer(context)
if err != nil {
return -1, err
}
status, err := container.Status()
if err != nil {
return -1, err
}
if status == libcontainer.Stopped {
return -1, fmt.Errorf("cannot exec a container that has stopped")
}
path := context.String("process")
if path == "" && len(context.Args()) == 1 {
return -1, fmt.Errorf("process args cannot be empty")
}
detach := context.Bool("detach")
state, err := container.State()
if err != nil {
return -1, err
}
bundle := utils.SearchLabels(state.Config.Labels, "bundle")
p, err := getProcess(context, bundle)
if err != nil {
return -1, err
}
r := &runner{
enableSubreaper: false,
shouldDestroy: false,
container: container,
consoleSocket: context.String("console-socket"),
detach: detach,
pidFile: context.String("pid-file"),
}
return r.run(p)
}
func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
if path := context.String("process"); path != "" {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
var p specs.Process
if err := json.NewDecoder(f).Decode(&p); err != nil {
return nil, err
}
return &p, validateProcessSpec(&p)
}
// process via cli flags
if err := os.Chdir(bundle); err != nil {
return nil, err
}
spec, err := loadSpec(specConfig)
if err != nil {
return nil, err
}
p := spec.Process
p.Args = context.Args()[1:]
// override the cwd, if passed
if context.String("cwd") != "" {
p.Cwd = context.String("cwd")
}
if ap := context.String("apparmor"); ap != "" {
p.ApparmorProfile = ap
}
if l := context.String("process-label"); l != "" {
p.SelinuxLabel = l
}
if caps := context.StringSlice("cap"); len(caps) > 0 {
for _, c := range caps {
p.Capabilities.Bounding = append(p.Capabilities.Bounding, c)
p.Capabilities.Inheritable = append(p.Capabilities.Inheritable, c)
p.Capabilities.Effective = append(p.Capabilities.Effective, c)
p.Capabilities.Permitted = append(p.Capabilities.Permitted, c)
p.Capabilities.Ambient = append(p.Capabilities.Ambient, c)
}
}
// append the passed env variables
p.Env = append(p.Env, context.StringSlice("env")...)
// set the tty
if context.IsSet("tty") {
p.Terminal = context.Bool("tty")
}
if context.IsSet("no-new-privs") {
p.NoNewPrivileges = context.Bool("no-new-privs")
}
// override the user, if passed
if context.String("user") != "" {
u := strings.SplitN(context.String("user"), ":", 2)
if len(u) > 1 {
gid, err := strconv.Atoi(u[1])
if err != nil {
return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
}
p.User.GID = uint32(gid)
}
uid, err := strconv.Atoi(u[0])
if err != nil {
return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
}
p.User.UID = uint32(uid)
}
return &p, nil
}

View file

@ -1,115 +0,0 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"syscall"
"github.com/urfave/cli"
)
var signalMap = map[string]syscall.Signal{
"ABRT": syscall.SIGABRT,
"ALRM": syscall.SIGALRM,
"BUS": syscall.SIGBUS,
"CHLD": syscall.SIGCHLD,
"CLD": syscall.SIGCLD,
"CONT": syscall.SIGCONT,
"FPE": syscall.SIGFPE,
"HUP": syscall.SIGHUP,
"ILL": syscall.SIGILL,
"INT": syscall.SIGINT,
"IO": syscall.SIGIO,
"IOT": syscall.SIGIOT,
"KILL": syscall.SIGKILL,
"PIPE": syscall.SIGPIPE,
"POLL": syscall.SIGPOLL,
"PROF": syscall.SIGPROF,
"PWR": syscall.SIGPWR,
"QUIT": syscall.SIGQUIT,
"SEGV": syscall.SIGSEGV,
"STKFLT": syscall.SIGSTKFLT,
"STOP": syscall.SIGSTOP,
"SYS": syscall.SIGSYS,
"TERM": syscall.SIGTERM,
"TRAP": syscall.SIGTRAP,
"TSTP": syscall.SIGTSTP,
"TTIN": syscall.SIGTTIN,
"TTOU": syscall.SIGTTOU,
"UNUSED": syscall.SIGUNUSED,
"URG": syscall.SIGURG,
"USR1": syscall.SIGUSR1,
"USR2": syscall.SIGUSR2,
"VTALRM": syscall.SIGVTALRM,
"WINCH": syscall.SIGWINCH,
"XCPU": syscall.SIGXCPU,
"XFSZ": syscall.SIGXFSZ,
}
var killCommand = cli.Command{
Name: "kill",
Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process",
ArgsUsage: `<container-id> [signal]
Where "<container-id>" is the name for the instance of the container and
"[signal]" is the signal to be sent to the init process.
EXAMPLE:
For example, if the container id is "ubuntu01" the following will send a "KILL"
signal to the init process of the "ubuntu01" container:
# runc kill ubuntu01 KILL`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "all, a",
Usage: "send the specified signal to all processes inside the container",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if err := checkArgs(context, 2, maxArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
sigstr := context.Args().Get(1)
if sigstr == "" {
sigstr = "SIGTERM"
}
signal, err := parseSignal(sigstr)
if err != nil {
return err
}
if err := container.Signal(signal, context.Bool("all")); err != nil {
return err
}
return nil
},
}
func parseSignal(rawSignal string) (syscall.Signal, error) {
s, err := strconv.Atoi(rawSignal)
if err == nil {
sig := syscall.Signal(s)
for _, msig := range signalMap {
if sig == msig {
return sig, nil
}
}
return -1, fmt.Errorf("unknown signal %q", rawSignal)
}
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
if !ok {
return -1, fmt.Errorf("unknown signal %q", rawSignal)
}
return signal, nil
}

View file

@ -1,114 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
var capabilityMap map[string]capability.Cap
func init() {
capabilityMap = make(map[string]capability.Cap)
last := capability.CAP_LAST_CAP
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
if last == capability.Cap(63) {
last = capability.CAP_BLOCK_SUSPEND
}
for _, cap := range capability.List() {
if cap > last {
continue
}
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
capabilityMap[capKey] = cap
}
}
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
bounding := []capability.Cap{}
for _, c := range capConfig.Bounding {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
bounding = append(bounding, v)
}
effective := []capability.Cap{}
for _, c := range capConfig.Effective {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
effective = append(effective, v)
}
inheritable := []capability.Cap{}
for _, c := range capConfig.Inheritable {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
inheritable = append(inheritable, v)
}
permitted := []capability.Cap{}
for _, c := range capConfig.Permitted {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
permitted = append(permitted, v)
}
ambient := []capability.Cap{}
for _, c := range capConfig.Ambient {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
ambient = append(ambient, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &containerCapabilities{
bounding: bounding,
effective: effective,
inheritable: inheritable,
permitted: permitted,
ambient: ambient,
pid: pid,
}, nil
}
type containerCapabilities struct {
pid capability.Capabilities
bounding []capability.Cap
effective []capability.Cap
inheritable []capability.Cap
permitted []capability.Cap
ambient []capability.Cap
}
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
func (c *containerCapabilities) ApplyBoundingSet() error {
c.pid.Clear(capability.BOUNDS)
c.pid.Set(capability.BOUNDS, c.bounding...)
return c.pid.Apply(capability.BOUNDS)
}
// Apply sets all the capabilities for the current process in the config.
func (c *containerCapabilities) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
c.pid.Set(capability.BOUNDS, c.bounding...)
c.pid.Set(capability.PERMITTED, c.permitted...)
c.pid.Set(capability.INHERITABLE, c.inheritable...)
c.pid.Set(capability.EFFECTIVE, c.effective...)
c.pid.Set(capability.AMBIENT, c.ambient...)
return c.pid.Apply(allCapabilityTypes)
}

View file

@ -1,10 +0,0 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
}

View file

@ -1,17 +0,0 @@
package libcontainer
import (
"io"
"os"
)
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriteCloser
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
File() *os.File
}

View file

@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View file

@ -1,157 +0,0 @@
package libcontainer
import (
"fmt"
"os"
"unsafe"
"golang.org/x/sys/unix"
)
func ConsoleFromFile(f *os.File) Console {
return &linuxConsole{
master: f,
}
}
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
if err := saneTerminal(master); err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// linuxConsole is a linux pseudo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) File() *os.File {
return c.master
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount() error {
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(unix.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := unix.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := unix.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}
// saneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func saneTerminal(terminal *os.File) error {
// Go doesn't have a wrapper for any of the termios ioctls.
var termios unix.Termios
if err := ioctl(terminal.Fd(), unix.TCGETS, uintptr(unsafe.Pointer(&termios))); err != nil {
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
}
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
if err := ioctl(terminal.Fd(), unix.TCSETS, uintptr(unsafe.Pointer(&termios))); err != nil {
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
}
return nil
}

View file

@ -1,11 +0,0 @@
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View file

@ -1,30 +0,0 @@
package libcontainer
// newConsole returns an initialized console that can be used within a container
func newConsole() (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows pseudo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View file

@ -1,166 +0,0 @@
// Package libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
package libcontainer
import (
"os"
"time"
"github.com/opencontainers/runc/libcontainer/configs"
)
// Status is the status of a container.
type Status int
const (
// Created is the status that denotes the container exists but has not been run yet.
Created Status = iota
// Running is the status that denotes the container exists and is running.
Running
// Pausing is the status that denotes the container exists, it is in the process of being paused.
Pausing
// Paused is the status that denotes the container exists, but all its processes are paused.
Paused
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
)
func (s Status) String() string {
switch s {
case Created:
return "created"
case Running:
return "running"
case Pausing:
return "pausing"
case Paused:
return "paused"
case Stopped:
return "stopped"
default:
return "unknown"
}
}
// BaseState represents the platform agnostic pieces relating to a
// running container's state
type BaseState struct {
// ID is the container ID.
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace.
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime string `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
// Config is the container's configuration.
Config configs.Config `json:"config"`
}
// BaseContainer is a libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found. BaseContainer includes methods that are platform agnostic.
type BaseContainer interface {
// Returns the ID of the container
ID() string
// Returns the current status of the container.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// SystemError - System error.
State() (*State, error)
// Returns the current config of the container.
Config() configs.Config
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, error)
// Returns statistics for the container.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
// Set resources of container as configured
//
// We can use this to change resources when containers are running.
//
// errors:
// SystemError - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Start(process *Process) (err error)
// Run immediately starts the process inside the container. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container, if its in a valid state, after killing any
// remaining running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// Running containers must first be stopped using Signal(..).
// Paused containers must first be resumed using Resume(..).
//
// errors:
// ContainerNotStopped - Container is still running,
// ContainerPaused - Container is paused,
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// If all is specified the signal is sent to all processes in the container
// including the initial process.
//
// errors:
// SystemError - System error.
Signal(s os.Signal, all bool) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

File diff suppressed because it is too large Load diff

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,39 +0,0 @@
// +build linux freebsd
package libcontainer
// cgroup restoring strategy provided by criu
type cgMode uint32
const (
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
)
type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
}
type VethPairName struct {
ContainerInterfaceName string
HostInterfaceName string
}
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // direcotry for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
}

View file

@ -1,6 +0,0 @@
package libcontainer
// TODO Windows: This can ultimately be entirely factored out as criu is
// a Unix concept not relevant on Windows.
type CriuOpts struct {
}

View file

@ -1,70 +0,0 @@
package libcontainer
import "io"
// ErrorCode is the API error code type.
type ErrorCode int
// API error codes.
const (
// Factory errors
IdInUse ErrorCode = iota
InvalidIdFormat
// Container errors
ContainerNotExists
ContainerPaused
ContainerNotStopped
ContainerNotRunning
ContainerNotPaused
// Process errors
NoProcessOps
// Common errors
ConfigInvalid
ConsoleExists
SystemError
)
func (c ErrorCode) String() string {
switch c {
case IdInUse:
return "Id already in use"
case InvalidIdFormat:
return "Invalid format"
case ContainerPaused:
return "Container paused"
case ConfigInvalid:
return "Invalid configuration"
case SystemError:
return "System error"
case ContainerNotExists:
return "Container does not exist"
case ContainerNotStopped:
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
case ConsoleExists:
return "Console exists for process"
case ContainerNotPaused:
return "Container is not paused"
case NoProcessOps:
return "No process operations"
default:
return "Unknown error"
}
}
// Error is the API error type.
type Error interface {
error
// Returns an error if it failed to write the detail of the Error to w.
// The detail of the Error may include the error message and a
// representation of the stack trace.
Detail(w io.Writer) error
// Returns the error code for this error.
Code() ErrorCode
}

View file

@ -1,44 +0,0 @@
package libcontainer
import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type Factory interface {
// Creates a new container with the given id and starts the initial process inside it.
// id must be a string containing only letters, digits and underscores and must contain
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and filesystem) must have distinct ids.
//
// Returns the new container with a running process.
//
// errors:
// IdInUse - id is already in use by a container
// InvalidIdFormat - id has incorrect format
// ConfigInvalid - config is invalid
// Systemerror - System error
//
// On error, any partially created container parts are cleaned up (the operation is atomic).
Create(id string, config *configs.Config) (Container, error)
// Load takes an ID for an existing container and returns the container information
// from the state. This presents a read only view of the container.
//
// errors:
// Path does not exist
// System error
Load(id string) (Container, error)
// StartInitialization is an internal API to libcontainer used during the reexec of the
// container.
//
// Errors:
// Pipe connection error
// System error
StartInitialization() error
// Type returns info string about factory type (e.g. lxc, libcontainer...)
Type() string
}

View file

@ -1,337 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"runtime/debug"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/utils"
)
const (
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
)
var (
idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
maxIdLen = 1024
)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init binary path and arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) (err error) {
if len(args) > 0 {
// Resolve relative paths to ensure that its available
// after directory changes.
if args[0], err = filepath.Abs(args[0]); err != nil {
return newGenericError(err, ConfigInvalid)
}
}
l.InitArgs = args
return nil
}
}
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// Cgroupfs is an options func to configure a LinuxFactory to return
// containers that use the native cgroups filesystem implementation to
// create and manage cgroups.
func Cgroupfs(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &fs.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
if err != nil {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// CriuPath returns an option func to configure a LinuxFactory with the
// provided criupath
func CriuPath(criupath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.CriuPath = criupath
return nil
}
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
InitArgs: []string{"/proc/self/exe", "init"},
Validator: validate.New(),
CriuPath: "criu",
}
Cgroupfs(l)
for _, opt := range options {
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
// LinuxFactory implements the default factory interface for linux based systems.
type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitArgs are arguments for calling the init responsibilities for spawning
// a container.
InitArgs []string
// CriuPath is the path to the criu binary used for checkpoint and restore of
// containers.
CriuPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
uid, err := config.HostRootUID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
gid, err := config.HostRootGID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
}
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
id: id,
root: containerRoot,
config: config,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
c.state = &stoppedState{c: c}
return c, nil
}
func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot, id)
if err != nil {
return nil, err
}
r := &nonChildProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &loadedState{c: c}
if err := c.refreshState(); err != nil {
return nil, err
}
return c, nil
}
func (l *LinuxFactory) Type() string {
return "libcontainer"
}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var (
pipefd, rootfd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
// Get the INITPIPE.
pipefd, err = strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
defer pipe.Close()
// Only init processes have STATEDIR.
rootfd = -1
if it == initStandard {
if rootfd, err = strconv.Atoi(envStateDir); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err)
}
}
if envConsole != "" {
console, err := strconv.Atoi(envConsole)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
}
consoleSocket = os.NewFile(uintptr(console), "console-socket")
defer consoleSocket.Close()
}
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err := newContainerInit(it, pipe, consoleSocket, rootfd)
if err != nil {
return err
}
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
return i.Init()
}
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, newGenericError(err, SystemError)
}
return state, nil
}
func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
return nil
}

View file

@ -1,92 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
{{$i}}: {{$frame.Function}}
Package: {{$frame.Package}}
File: {{$frame.File}}@{{$frame.Line}}{{end}}
`))
func newGenericError(err error, c ErrorCode) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: c,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
func newSystemError(err error) Error {
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Cause: cause,
Stack: stacktrace.Capture(2),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
}
func (e *genericError) Code() ErrorCode {
return e.ECode
}
func (e *genericError) Detail(w io.Writer) error {
return errorTemplate.Execute(w, e)
}

View file

@ -1,500 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"net"
"os"
"strings"
"syscall"
"unsafe"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
Pid int `json:"pid"`
}
// network is an internal struct used to setup container networks.
type network struct {
configs.Network
// TempVethPeerName is a unique temporary veth peer name that was placed into
// the container's namespace.
TempVethPeerName string `json:"temp_veth_peer_name"`
}
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities *configs.Capabilities `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
Rootless bool `json:"rootless"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDirFD int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: syscall.Getppid(),
config: config,
stateDirFD: stateDirFD,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
// Ensure that all unwanted fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
return err
}
capabilities := &configs.Capabilities{}
if config.Capabilities != nil {
capabilities = config.Capabilities
} else if config.Config.Capabilities != nil {
capabilities = config.Config.Capabilities
}
w, err := newContainerCapList(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.ApplyBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return err
}
if err := w.ApplyCaps(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
}
}
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
defer socket.Close()
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
console, err := newConsole()
if err != nil {
return err
}
// After we return from here, we don't need the console anymore.
defer console.Close()
linuxConsole, ok := console.(*linuxConsole)
if !ok {
return fmt.Errorf("failed to cast console to *linuxConsole")
}
// Mount the console inside our rootfs.
if mount {
if err := linuxConsole.mount(); err != nil {
return err
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, linuxConsole.File()); err != nil {
return err
}
// Now, dup over all the things.
return linuxConsole.dupStdio()
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
if config.Rootless {
if execUser.Uid != 0 {
return fmt.Errorf("cannot run as a non-root user in a rootless container")
}
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(config, execUser); err != nil {
return err
}
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
return err
}
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
// Skip chown if s.Gid is actually an unmapped gid in the host. While
// this is a bit dodgy if it just so happens that the console _is_
// owned by overflow_gid, there's no way for us to disambiguate this as
// a userspace program.
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
continue
}
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := syscall.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}
return nil
}
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
const _P_PID = 1
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}
// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := syscall.Syscall6(syscall.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), syscall.WEXITED|syscall.WNOWAIT|syscall.WNOHANG, 0, 0)
if e != 0 {
return false, os.NewSyscallError("waitid", e)
}
return si.si_pid != 0, nil
}
// isNoChildren returns true if err represents a syscall.ECHILD false otherwise
func isNoChildren(err error) bool {
switch err := err.(type) {
case syscall.Errno:
if err == syscall.ECHILD {
return true
}
case *os.SyscallError:
if err.Err == syscall.ECHILD {
return true
}
}
return false
}
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
pids, err := m.GetAllPids()
if err != nil {
m.Freeze(configs.Thawed)
return err
}
for _, pid := range pids {
p, err := os.FindProcess(pid)
if err != nil {
logrus.Warn(err)
continue
}
procs = append(procs, p)
if err := p.Signal(s); err != nil {
logrus.Warn(err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if s != syscall.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !isNoChildren(err) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}
if _, err := p.Wait(); err != nil {
if !isNoChildren(err) {
logrus.Warn("wait: ", err)
}
}
}
return nil
}

View file

@ -1,91 +0,0 @@
// +build linux
package libcontainer
import (
"syscall"
"github.com/vishvananda/netlink/nl"
)
// list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
// When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
)
type Int32msg struct {
Type uint16
Value uint32
}
// Serialize serializes the message.
// Int32msg has the following representation
// | nlattr len | nlattr type |
// | uint32 value |
func (msg *Int32msg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
native.PutUint32(buf[4:8], msg.Value)
return buf
}
func (msg *Int32msg) Len() int {
return syscall_NLA_HDRLEN + 4
}
// Bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {
Type uint16
Value []byte
}
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
copy(buf[4:], msg.Value)
return buf
}
func (msg *Bytemsg) Len() int {
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
}
type Boolmsg struct {
Type uint16
Value bool
}
func (msg *Boolmsg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
if msg.Value {
buf[4] = 1
} else {
buf[4] = 0
}
return buf
}
func (msg *Boolmsg) Len() int {
return syscall_NLA_HDRLEN + 1
}

View file

@ -1,259 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"net"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
var strategies = map[string]networkStrategy{
"veth": &veth{},
"loopback": &loopback{},
}
// networkStrategy represents a specific network configuration for
// a container's networking stack
type networkStrategy interface {
create(*network, int) error
initialize(*network) error
detach(*configs.Network) error
attach(*configs.Network) error
}
// getStrategy returns the specific network strategy for the
// provided type.
func getStrategy(tpe string) (networkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, fmt.Errorf("unknown strategy type %q", tpe)
}
return s, nil
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
out := &NetworkInterface{Name: interfaceName}
// This can happen if the network runtime information is missing - possible if the
// container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
}
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
// loopback is a network strategy that provides a basic loopback device
type loopback struct {
}
func (l *loopback) create(n *network, nspid int) error {
return nil
}
func (l *loopback) initialize(config *network) error {
return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
}
func (l *loopback) attach(n *configs.Network) (err error) {
return nil
}
func (l *loopback) detach(n *configs.Network) (err error) {
return nil
}
// veth is a network strategy that uses a bridge and creates
// a veth pair, one that is attached to the bridge on the host and the other
// is placed inside the container's namespace
type veth struct {
}
func (v *veth) detach(n *configs.Network) (err error) {
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
brl, err := netlink.LinkByName(n.Bridge)
if err != nil {
return err
}
br, ok := brl.(*netlink.Bridge)
if !ok {
return fmt.Errorf("Wrong device type %T", brl)
}
host, err := netlink.LinkByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.LinkSetMaster(host, br); err != nil {
return err
}
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
return err
}
if n.HairpinMode {
if err := netlink.LinkSetHairpin(host, true); err != nil {
return err
}
}
if err := netlink.LinkSetUp(host); err != nil {
return err
}
return nil
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: n.HostInterfaceName,
TxQLen: n.TxQueueLen,
},
PeerName: n.TempVethPeerName,
}
if err := netlink.LinkAdd(veth); err != nil {
return err
}
defer func() {
if err != nil {
netlink.LinkDel(veth)
}
}()
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := netlink.LinkByName(n.TempVethPeerName)
if err != nil {
return err
}
return netlink.LinkSetNsPid(child, nspid)
}
func (v *veth) generateTempPeerName() (string, error) {
return utils.GenerateRandomName("veth", 7)
}
func (v *veth) initialize(config *network) error {
peer := config.TempVethPeerName
if peer == "" {
return fmt.Errorf("peer is not specified")
}
child, err := netlink.LinkByName(peer)
if err != nil {
return err
}
if err := netlink.LinkSetDown(child); err != nil {
return err
}
if err := netlink.LinkSetName(child, config.Name); err != nil {
return err
}
// get the interface again after we changed the name as the index also changes.
if child, err = netlink.LinkByName(config.Name); err != nil {
return err
}
if config.MacAddress != "" {
mac, err := net.ParseMAC(config.MacAddress)
if err != nil {
return err
}
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
return err
}
}
ip, err := netlink.ParseAddr(config.Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip); err != nil {
return err
}
if config.IPv6Address != "" {
ip6, err := netlink.ParseAddr(config.IPv6Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip6); err != nil {
return err
}
}
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
return err
}
if err := netlink.LinkSetUp(child); err != nil {
return err
}
if config.Gateway != "" {
gw := net.ParseIP(config.Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
if config.IPv6Gateway != "" {
gw := net.ParseIP(config.IPv6Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
return nil
}

View file

@ -1,89 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
const oomCgroupName = "memory"
type PressureLevel uint
const (
LowPressure PressureLevel = iota
MediumPressure
CriticalPressure
)
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
evFile, err := os.Open(filepath.Join(cgDir, evName))
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
evFile.Close()
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
evFile.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
evFile.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}
// notifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("path %q missing", oomCgroupName)
}
return registerMemoryEvent(dir, "memory.oom_control", "")
}
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("path %q missing", oomCgroupName)
}
if level > CriticalPressure {
return nil, fmt.Errorf("invalid pressure level %d", level)
}
levelStr := []string{"low", "medium", "critical"}[level]
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
}

View file

@ -1,106 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"math"
"os"
"github.com/opencontainers/runc/libcontainer/configs"
)
type processOperations interface {
wait() (*os.ProcessState, error)
signal(sig os.Signal) error
pid() int
}
// Process specifies the configuration and IO for a process inside
// a container.
type Process struct {
// The command to be run followed by any arguments.
Args []string
// Env specifies the environment variables for the process.
Env []string
// User will set the uid and gid of the executing process running inside the container
// local to the container's user and group configuration.
User string
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string
// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
// Stdin is a pointer to a reader which provides the standard input stream.
Stdin io.Reader
// Stdout is a pointer to a writer which receives the standard output stream.
Stdout io.Writer
// Stderr is a pointer to a writer which receives the standard error stream.
Stderr io.Writer
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *configs.Capabilities
// AppArmorProfile specifies the profile to apply to the process and is
// changed at the time the process is execed
AppArmorProfile string
// Label specifies the label to apply to the process. It is commonly used by selinux
Label string
// NoNewPrivileges controls whether processes can gain additional privileges.
NoNewPrivileges *bool
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []configs.Rlimit
// ConsoleSocket provides the masterfd console.
ConsoleSocket *os.File
ops processOperations
}
// Wait waits for the process to exit.
// Wait releases any resources associated with the Process
func (p Process) Wait() (*os.ProcessState, error) {
if p.ops == nil {
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.wait()
}
// Pid returns the process ID
func (p Process) Pid() (int, error) {
// math.MinInt32 is returned here, because it's invalid value
// for the kill() system call.
if p.ops == nil {
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.pid(), nil
}
// Signal sends a signal to the Process.
func (p Process) Signal(sig os.Signal) error {
if p.ops == nil {
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.signal(sig)
}
// IO holds the process's STDIO
type IO struct {
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
}

View file

@ -1,483 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
// start starts the process execution.
start() error
// send a SIGKILL to the process and wait for the exit.
terminate() error
// wait waits on the process returning the process state.
wait() (*os.ProcessState, error)
// startTime returns the process start time.
startTime() (string, error)
signal(os.Signal) error
externalDescriptors() []string
setExternalDescriptors(fds []string)
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
cgroupPaths map[string]string
config *initConfig
fds []string
process *Process
bootstrapData io.Reader
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *setnsProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
}
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
}
// We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for process")
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemErrorWithCause(err, "writing config to pipe")
}
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// This shouldn't happen.
panic("unexpected procReady in setns")
case procHooks:
// This shouldn't happen.
panic("unexpected procHooks in setns")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
})
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
}
return nil
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "waiting on setns process to finish")
}
if !status.Success() {
p.cmd.Wait()
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
// avoid the process becoming a zombie.
func (p *setnsProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *setnsProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
// Return actual ProcessState even on Wait error
return p.cmd.ProcessState, err
}
func (p *setnsProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *setnsProcess) externalDescriptors() []string {
return p.fds
}
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
container *linuxContainer
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
rootDir *os.File
}
func (p *initProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *initProcess) externalDescriptors() []string {
return p.fds
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
// This is called by initProcess.start function
func (p *initProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return err
}
if !status.Success() {
p.cmd.Wait()
return &exec.ExitError{ProcessState: status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return err
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces")
}
if err := p.sendConfig(); err != nil {
return newSystemErrorWithCause(err, "sending config to init process")
}
var (
sentRun bool
sentResume bool
)
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for ready process")
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
case procHooks:
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
return nil
})
if !sentRun {
return newSystemErrorWithCause(ierr, "container init")
}
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
}
return nil
}
func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
if err != nil {
return p.cmd.ProcessState, err
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.sharePidns {
signalAllProcesses(p.manager, syscall.SIGKILL)
}
return p.cmd.ProcessState, nil
}
func (p *initProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *initProcess) sendConfig() error {
// send the config to the container's init process, we don't use JSON Encode
// here because there might be a problem in JSON decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
return utils.WriteJSON(p.parentPipe, p.config)
}
func (p *initProcess) createNetworkInterfaces() error {
for _, config := range p.config.Config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
n := &network{
Network: *config,
}
if err := strategy.create(n, p.pid()); err != nil {
return err
}
p.config.Networks = append(p.config.Networks, n)
}
return nil
}
func (p *initProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
}
func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func getPipeFds(pid int) ([]string, error) {
fds := make([]string, 3)
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
// XXX: This breaks if the path is not a valid symlink (which can
// happen in certain particularly unlucky mount namespace setups).
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
// Ignore permission errors, for rootless containers and other
// non-dumpable processes. if we can't get the fd for a particular
// file, there's not much we can do.
if os.IsPermission(err) {
continue
}
return fds, err
}
fds[i] = target
}
return fds, nil
}
// InitializeIO creates pipes for use with the process's stdio and returns the
// opposite side for each. Do not use this if you want to have a pseudoterminal
// set up for you by libcontainer (TODO: fix that too).
// TODO: This is mostly unnecessary, and should be handled by clients.
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
defer func() {
if err != nil {
for _, fd := range fds {
syscall.Close(int(fd))
}
}
}()
// STDIN
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stdin, i.Stdin = r, w
// STDOUT
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stdout, i.Stdout = w, r
// STDERR
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
return nil, err
}
}
return i, nil
}

View file

@ -1,122 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/opencontainers/runc/libcontainer/system"
)
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
var (
err error
)
proc, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
started, err := system.GetProcessStartTime(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: started,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime string
fds []string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.proc.Pid
}
func (p *restoredProcess) terminate() error {
err := p.proc.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
// TODO: how do we wait on the actual process?
// maybe use --exec-cmd in criu
st, err := p.proc.Wait()
if err != nil {
return nil, err
}
return st, nil
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return p.proc.Signal(s)
}
func (p *restoredProcess) externalDescriptors() []string {
return p.fds
}
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
// nonChildProcess represents a process where the calling process is not
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime string
fds []string
}
func (p *nonChildProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *nonChildProcess) pid() int {
return p.processPid
}
func (p *nonChildProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *nonChildProcess) signal(s os.Signal) error {
proc, err := os.FindProcess(p.processPid)
if err != nil {
return err
}
return proc.Signal(s)
}
func (p *nonChildProcess) externalDescriptors() []string {
return p.fds
}
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}

View file

@ -1,812 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/symlink"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/selinux/go-selinux/label"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
return false
}
}
return true
}
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro or pivot_root,
// because console setup happens inside the caller. You must call
// finalizeRootfs in order to finish the rootfs setup.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs")
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemErrorWithCause(err, "running postmount command")
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
// manipulations.
if err := syncParentHooks(pipe); err != nil {
return err
}
// The reason these operations are done here rather than in finalizeRootfs
// is because the console-handling code gets quite sticky if we have to set
// up the console before doing the pivot_root(2). This is because the
// Console API has to also work with the ExecIn case, which means that the
// API must be able to deal with being inside as well as outside the
// container. It's just cleaner to do this here (at the expense of the
// operation not being perfectly split).
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs)
}
if err != nil {
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
return nil
}
// finalizeRootfs actually switches the root of the process and sets anything
// to ro if necessary. You must call prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
// remount dev as ro if specified
for _, m := range config.Mounts {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Flags&syscall.MS_RDONLY == syscall.MS_RDONLY {
if err := remountReadonly(m); err != nil {
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
}
}
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
syscall.Umask(0022)
return nil
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
switch m.Device {
case "proc", "sysfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
// Selinux kernels do not support labeling of /proc or /sys
return mountPropagate(m, rootfs, "")
case "mqueue":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
// older kernels do not support labeling of /dev/mqueue
if err := mountPropagate(m, rootfs, ""); err != nil {
return err
}
return label.SetFileLabel(dest, mountLabel)
}
return nil
case "tmpfs":
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
tmpDir := ""
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
if copyUp {
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
if err != nil {
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
}
defer os.RemoveAll(tmpDir)
m.Destination = tmpDir
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if copyUp {
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
if err := syscall.Mount(tmpDir, dest, "", syscall.MS_MOVE, ""); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
}
}
return nil
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
// bind mount won't change mount options, we need remount to make mount options effective.
// first check that we have non-default options required before attempting a remount
if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 {
// only remount if unique mount options are set
if err := remount(m, rootfs); err != nil {
return err
}
}
if m.Relabel != "" {
if err := label.Validate(m.Relabel); err != nil {
return err
}
shared := label.IsShared(m.Relabel)
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
return err
}
}
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
return err
}
var merged []string
for _, b := range binds {
ss := filepath.Base(b.Destination)
if strings.Contains(ss, ",") {
merged = append(merged, ss)
}
}
tmpfs := &configs.Mount{
Source: "tmpfs",
Device: "tmpfs",
Destination: m.Destination,
Flags: defaultMountFlags,
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
}
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
return err
}
}
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
}
}
default:
// ensure that the destination of the mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
var err error
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
}
return nil
}
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
mounts, err := cgroups.GetCgroupMounts(false)
if err != nil {
return nil, err
}
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
}
relDir, err := filepath.Rel(mm.Root, dir)
if err != nil {
return nil, err
}
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
}
return binds, nil
}
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
invalidDestinations := []string{
"/proc",
}
// White list, it should be sub directories of invalid destinations
validDestinations := []string{
// These entries can be bind mounted by files emulated by fuse,
// so commands like top, free displays stats in container.
"/proc/cpuinfo",
"/proc/diskstats",
"/proc/meminfo",
"/proc/stat",
"/proc/swaps",
"/proc/uptime",
"/proc/net/dev",
}
for _, valid := range validDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
if err != nil {
return err
}
if path == "." {
return nil
}
}
for _, invalid := range invalidDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
if err != nil {
return err
}
if path == "." || !strings.HasPrefix(path, "..") {
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
// this method will make them point to `/dev/null` in this container's rootfs. This
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull() error {
var stat, devNullStat syscall.Stat_t
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
return err
}
for fd := 0; fd < 3; fd++ {
if err := syscall.Fstat(fd, &stat); err != nil {
return err
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
return err
}
}
}
return nil
}
// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
oldMask := syscall.Umask(0000)
for _, node := range config.Devices {
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
syscall.Umask(oldMask)
return err
}
}
syscall.Umask(oldMask)
return nil
}
func bindMountDeviceNode(dest string, node *configs.Device) error {
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
return bindMountDeviceNode(dest, node)
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
} else if os.IsPermission(err) {
return bindMountDeviceNode(dest, node)
}
return err
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c', 'u':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
case 'p':
fileMode |= syscall.S_IFIFO
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
for _, m := range mountinfo {
if m.Mountpoint == dir {
return m
}
}
return nil
}
// Get the parent mount point of directory passed in as argument. Also return
// optional fields.
func getParentMount(rootfs string) (string, string, error) {
var path string
mountinfos, err := mount.GetMounts()
if err != nil {
return "", "", err
}
mountinfo := getMountInfo(mountinfos, rootfs)
if mountinfo != nil {
return rootfs, mountinfo.Optional, nil
}
path = rootfs
for {
path = filepath.Dir(path)
mountinfo = getMountInfo(mountinfos, path)
if mountinfo != nil {
return path, mountinfo.Optional, nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(rootfs string) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(rootfs)
if err != nil {
return err
}
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
}
}
// Make parent mount PRIVATE if it was shared. It is needed for two
// reasons. First of all pivot_root() will fail if parent mount is
// shared. Secondly when we bind mount rootfs it will propagate to
// parent namespace and we don't want that to happen.
if sharedMount {
return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
}
return nil
}
func prepareRoot(config *configs.Config) error {
flag := syscall.MS_SLAVE | syscall.MS_REC
if config.RootPropagation != 0 {
flag = config.RootPropagation
}
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
// Make parent mount private to make sure following bind mount does
// not propagate in other namespaces. Also it will help with kernel
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
}
func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
func setupPtmx(config *configs.Config) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
return nil
}
// pivotRoot will call pivot_root such that rootfs becomes the new root
// filesystem, and everything else is cleaned up.
func pivotRoot(rootfs string) error {
// While the documentation may claim otherwise, pivot_root(".", ".") is
// actually valid. What this results in is / being the new root but
// /proc/self/cwd being the old root. Since we can play around with the cwd
// with pivot_root this allows us to pivot without creating directories in
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
oldroot, err := syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
if err != nil {
return err
}
defer syscall.Close(oldroot)
newroot, err := syscall.Open(rootfs, syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
if err != nil {
return err
}
defer syscall.Close(newroot)
// Change to the new root so that the pivot_root actually acts on it.
if err := syscall.Fchdir(newroot); err != nil {
return err
}
if err := syscall.PivotRoot(".", "."); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
if err := syscall.Fchdir(oldroot); err != nil {
return err
}
// Make oldroot rprivate to make sure our unmounts don't propagate to the
// host (and thus bork the machine).
if err := syscall.Mount("", ".", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
return err
}
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
return err
}
// Switch back to our shiny new root.
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
func msMoveRoot(rootfs string) error {
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return err
}
if err := syscall.Chroot("."); err != nil {
return err
}
return syscall.Chdir("/")
}
// createIfNotExists creates a file or a directory only if it does not already exist.
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0755)
}
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
return nil
}
// readonlyPath will make a path read only.
func readonlyPath(path string) error {
if err := syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
// remountReadonly will remount an existing mount point and ensure that it is read-only.
func remountReadonly(m *configs.Mount) error {
var (
dest = m.Destination
flags = m.Flags
)
for i := 0; i < 5; i++ {
if err := syscall.Mount("", dest, "", uintptr(flags|syscall.MS_REMOUNT|syscall.MS_RDONLY), ""); err != nil {
switch err {
case syscall.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
return err
}
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
}
// maskPath masks the top of the specified path inside a container to avoid
// security issues from processes reading information from non-namespace aware
// mounts ( proc/kcore ).
// For files, maskPath bind mounts /dev/null over the top of the specified path.
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
func maskPath(path string) error {
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
if err == syscall.ENOTDIR {
return syscall.Mount("tmpfs", path, "tmpfs", syscall.MS_RDONLY, "")
}
return err
}
return nil
}
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
func writeSystemProperty(key, value string) error {
keyPath := strings.Replace(key, ".", "/", -1)
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
}
func remount(m *configs.Mount, rootfs string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
return err
}
return nil
}
// Do the mount operation followed by additional mounts required to take care
// of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
if libcontainerUtils.CleanPath(dest) == "/dev" {
flags &= ^syscall.MS_RDONLY
}
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
return err
}
for _, pflag := range m.PropagationFlags {
if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
return err
}
}
return nil
}

View file

@ -1,11 +0,0 @@
// +build linux,go1.5
package libcontainer
import "syscall"
// Set the GidMappingsEnableSetgroups member to true, so the process's
// setgroups proc entry wont be set to 'deny' if GidMappings are set
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = true
}

View file

@ -1,63 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
pipe *os.File
consoleSocket *os.File
config *initConfig
}
func (l *linuxSetnsInit) getSessionRingName() string {
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
}
func (l *linuxSetnsInit) Init() error {
if !l.config.Config.NoNewKeyring {
// do not inherit the parent's session keyring
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
}
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View file

@ -1,190 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"os/exec"
"syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
)
type linuxStandardInit struct {
pipe *os.File
consoleSocket *os.File
parentPid int
stateDirFD int
config *initConfig
}
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
var newperms uint32
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
// with user ns we need 'other' search permissions
newperms = 0x8
} else {
// without user ns we need 'UID' search permissions
newperms = 0x80000
}
// create a unique per session container name that we can
// join in setns; however, other containers can also join it
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
}
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
// the kernel
const PR_SET_NO_NEW_PRIVS = 0x26
func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring
sessKeyId, err := keys.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {
return err
}
label.Init()
// prepareRootfs() can be executed only for a new mount namespace.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
return err
}
}
// Set up the console. This has to be done *before* we finalize the rootfs,
// but *after* we've given the user the chance to set up all of the mounts
// they wanted.
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err
}
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
for key, value := range l.config.Config.Sysctl {
if err := writeSystemProperty(key, value); err != nil {
return err
}
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := readonlyPath(path); err != nil {
return err
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskPath(path); err != nil {
return err
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
if err := syncParentReady(l.pipe); err != nil {
return err
}
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
}
// compare the parent from the initial start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparented to something else so we should
// just kill ourself and not cause problems for someone else.
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// close the pipe to signal that we have completed our init.
l.pipe.Close()
// wait for the fifo to be opened on the other side before
// exec'ing the users process.
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
}
if _, err := syscall.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
syscall.Close(l.stateDirFD)
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
return nil
}

View file

@ -1,247 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
)
func newStateTransitionError(from, to containerState) error {
return &stateTransitionError{
From: from.status().String(),
To: to.status().String(),
}
}
// stateTransitionError is returned when an invalid state transition happens from one
// state to another.
type stateTransitionError struct {
From string
To string
}
func (s *stateTransitionError) Error() string {
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
}
type containerState interface {
transition(containerState) error
destroy() error
status() Status
}
func destroy(c *linuxContainer) error {
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := signalAllProcesses(c.cgroupManager, syscall.SIGKILL); err != nil {
logrus.Warn(err)
}
}
err := c.cgroupManager.Destroy()
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
if herr := runPoststopHooks(c); err == nil {
err = herr
}
c.state = &stoppedState{c: c}
return err
}
func runPoststopHooks(c *linuxContainer) error {
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Bundle: utils.SearchLabels(c.config.Labels, "bundle"),
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {
return err
}
}
}
return nil
}
// stoppedState represents a container is a stopped/destroyed state.
type stoppedState struct {
c *linuxContainer
}
func (b *stoppedState) status() Status {
return Stopped
}
func (b *stoppedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *restoredState:
b.c.state = s
return nil
case *stoppedState:
return nil
}
return newStateTransitionError(b, s)
}
func (b *stoppedState) destroy() error {
return destroy(b.c)
}
// runningState represents a container that is currently running.
type runningState struct {
c *linuxContainer
}
func (r *runningState) status() Status {
return Running
}
func (r *runningState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
t, err := r.c.runType()
if err != nil {
return err
}
if t == Running {
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
}
r.c.state = s
return nil
case *pausedState:
r.c.state = s
return nil
case *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *runningState) destroy() error {
t, err := r.c.runType()
if err != nil {
return err
}
if t == Running {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
return destroy(r.c)
}
type createdState struct {
c *linuxContainer
}
func (i *createdState) status() Status {
return Created
}
func (i *createdState) transition(s containerState) error {
switch s.(type) {
case *runningState, *pausedState, *stoppedState:
i.c.state = s
return nil
case *createdState:
return nil
}
return newStateTransitionError(i, s)
}
func (i *createdState) destroy() error {
i.c.initProcess.signal(syscall.SIGKILL)
return destroy(i.c)
}
// pausedState represents a container that is currently pause. It cannot be destroyed in a
// paused state and must transition back to running first.
type pausedState struct {
c *linuxContainer
}
func (p *pausedState) status() Status {
return Paused
}
func (p *pausedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *stoppedState:
p.c.state = s
return nil
case *pausedState:
return nil
}
return newStateTransitionError(p, s)
}
func (p *pausedState) destroy() error {
t, err := p.c.runType()
if err != nil {
return err
}
if t != Running && t != Created {
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
return err
}
return destroy(p.c)
}
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
}
// restoredState is the same as the running state but also has associated checkpoint
// information that maybe need destroyed when the container is stopped and destroy is called.
type restoredState struct {
imageDir string
c *linuxContainer
}
func (r *restoredState) status() Status {
return Running
}
func (r *restoredState) transition(s containerState) error {
switch s.(type) {
case *stoppedState, *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *restoredState) destroy() error {
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
if !os.IsNotExist(err) {
return err
}
}
return destroy(r.c)
}
// loadedState is used whenever a container is restored, loaded, or setting additional
// processes inside and it should not be destroyed when it is exiting.
type loadedState struct {
c *linuxContainer
s Status
}
func (n *loadedState) status() Status {
return n.s
}
func (n *loadedState) transition(s containerState) error {
n.c.state = s
return nil
}
func (n *loadedState) destroy() error {
if err := n.c.refreshState(); err != nil {
return err
}
return n.c.state.destroy()
}

View file

@ -1,15 +0,0 @@
package libcontainer
type NetworkInterface struct {
// Name is the name of the network interface.
Name string
RxBytes uint64
RxPackets uint64
RxErrors uint64
RxDropped uint64
TxBytes uint64
TxPackets uint64
TxErrors uint64
TxDropped uint64
}

View file

@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -1,8 +0,0 @@
package libcontainer
import "github.com/opencontainers/runc/libcontainer/cgroups"
type Stats struct {
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
}

View file

@ -1,7 +0,0 @@
package libcontainer
// Solaris - TODO
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -1,107 +0,0 @@
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"github.com/opencontainers/runc/libcontainer/utils"
)
type syncType string
// Constants that are used for synchronisation between the parent and child
// during container setup. They come in pairs (with procError being a generic
// response which is followed by a &genericError).
//
// [ child ] <-> [ parent ]
//
// procHooks --> [run hooks]
// <-- procResume
//
// procConsole -->
// <-- procConsoleReq
// [send(fd)] --> [recv(fd)]
// <-- procConsoleAck
//
// procReady --> [final setup]
// <-- procRun
const (
procError syncType = "procError"
procReady syncType = "procReady"
procRun syncType = "procRun"
procHooks syncType = "procHooks"
procResume syncType = "procResume"
)
type syncT struct {
Type syncType `json:"type"`
}
// writeSync is used to write to a synchronisation pipe. An error is returned
// if there was a problem writing the payload.
func writeSync(pipe io.Writer, sync syncType) error {
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
return err
}
return nil
}
// readSync is used to read from a synchronisation pipe. An error is returned
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
func readSync(pipe io.Reader, expected syncType) error {
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type == procError {
var ierr genericError
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
return fmt.Errorf("failed reading error from parent: %v", err)
}
return &ierr
}
if procSync.Type != expected {
return fmt.Errorf("invalid synchronisation flag from parent")
}
}
return nil
}
// parseSync runs the given callback function on each syncT received from the
// child. It will return once io.EOF is returned from the given pipe.
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
dec := json.NewDecoder(pipe)
for {
var sync syncT
if err := dec.Decode(&sync); err != nil {
if err == io.EOF {
break
}
return err
}
// We handle this case outside fn for cleanliness reasons.
var ierr *genericError
if sync.Type == procError {
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
return ierr
}
// Programmer error.
panic("No error following JSON procError payload.")
}
if err := fn(&sync); err != nil {
return err
}
}
return nil
}

View file

@ -1,175 +0,0 @@
// +build linux
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"text/tabwriter"
"time"
"encoding/json"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli"
)
const formatOptions = `table or json`
// containerState represents the platform agnostic pieces relating to a
// running container's status and state
type containerState struct {
// Version is the OCI version for the container
Version string `json:"ociVersion"`
// ID is the container ID
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace
InitProcessPid int `json:"pid"`
// Status is the current status of the container, running, paused, ...
Status string `json:"status"`
// Bundle is the path on the filesystem to the bundle
Bundle string `json:"bundle"`
// Rootfs is a path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
// Annotations is the user defined annotations added to the config.
Annotations map[string]string `json:"annotations,omitempty"`
// The owner of the state directory (the owner of the container).
Owner string `json:"owner"`
}
var listCommand = cli.Command{
Name: "list",
Usage: "lists containers started by runc with the given root",
ArgsUsage: `
Where the given root is specified via the global option "--root"
(default: "/run/runc").
EXAMPLE 1:
To list containers created via the default "--root":
# runc list
EXAMPLE 2:
To list containers created using a non-default value for "--root":
# runc --root value list`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "format, f",
Value: "table",
Usage: `select one of: ` + formatOptions,
},
cli.BoolFlag{
Name: "quiet, q",
Usage: "display only container IDs",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 0, exactArgs); err != nil {
return err
}
s, err := getContainers(context)
if err != nil {
return err
}
if context.Bool("quiet") {
for _, item := range s {
fmt.Println(item.ID)
}
return nil
}
switch context.String("format") {
case "table":
w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0)
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n")
for _, item := range s {
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n",
item.ID,
item.InitProcessPid,
item.Status,
item.Bundle,
item.Created.Format(time.RFC3339Nano),
item.Owner)
}
if err := w.Flush(); err != nil {
return err
}
case "json":
if err := json.NewEncoder(os.Stdout).Encode(s); err != nil {
return err
}
default:
return fmt.Errorf("invalid format option")
}
return nil
},
}
func getContainers(context *cli.Context) ([]containerState, error) {
factory, err := loadFactory(context)
if err != nil {
return nil, err
}
root := context.GlobalString("root")
absRoot, err := filepath.Abs(root)
if err != nil {
return nil, err
}
list, err := ioutil.ReadDir(absRoot)
if err != nil {
fatal(err)
}
var s []containerState
for _, item := range list {
if item.IsDir() {
// This cast is safe on Linux.
stat := item.Sys().(*syscall.Stat_t)
owner, err := user.LookupUid(int(stat.Uid))
if err != nil {
owner.Name = string(stat.Uid)
}
container, err := factory.Load(item.Name())
if err != nil {
fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err)
continue
}
containerStatus, err := container.Status()
if err != nil {
fmt.Fprintf(os.Stderr, "status for %s: %v\n", item.Name(), err)
continue
}
state, err := container.State()
if err != nil {
fmt.Fprintf(os.Stderr, "state for %s: %v\n", item.Name(), err)
continue
}
pid := state.BaseState.InitProcessPid
if containerStatus == libcontainer.Stopped {
pid = 0
}
bundle, annotations := utils.Annotations(state.Config.Labels)
s = append(s, containerState{
Version: state.BaseState.Config.Version,
ID: state.BaseState.ID,
InitProcessPid: pid,
Status: containerStatus.String(),
Bundle: bundle,
Rootfs: state.BaseState.Config.Rootfs,
Created: state.BaseState.Created,
Annotations: annotations,
Owner: owner.Name,
})
}
}
return s, nil
}

View file

@ -1,149 +0,0 @@
package main
import (
"fmt"
"io"
"os"
"strings"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
// version will be populated by the Makefile, read from
// VERSION file of the source code.
var version = ""
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
const (
specConfig = "config.json"
usage = `Open Container Initiative runtime
runc is a command line client for running applications packaged according to
the Open Container Initiative (OCI) format and is a compliant implementation of the
Open Container Initiative specification.
runc integrates well with existing process supervisors to provide a production
container runtime environment for applications. It can be used with your
existing process monitoring tools and the container will be spawned as a
direct child of the process supervisor.
Containers are configured using bundles. A bundle for a container is a directory
that includes a specification file named "` + specConfig + `" and a root filesystem.
The root filesystem contains the contents of the container.
To start a new instance of a container:
# runc run [ -b bundle ] <container-id>
Where "<container-id>" is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique on
your host. Providing the bundle directory using "-b" is optional. The default
value for "bundle" is the current directory.`
)
func main() {
app := cli.NewApp()
app.Name = "runc"
app.Usage = usage
var v []string
if version != "" {
v = append(v, version)
}
if gitCommit != "" {
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
}
v = append(v, fmt.Sprintf("spec: %s", specs.Version))
app.Version = strings.Join(v, "\n")
app.Flags = []cli.Flag{
cli.BoolFlag{
Name: "debug",
Usage: "enable debug output for logging",
},
cli.StringFlag{
Name: "log",
Value: "/dev/null",
Usage: "set the log file path where internal debug information is written",
},
cli.StringFlag{
Name: "log-format",
Value: "text",
Usage: "set the format used by logs ('text' (default), or 'json')",
},
cli.StringFlag{
Name: "root",
Value: "/run/runc",
Usage: "root directory for storage of container state (this should be located in tmpfs)",
},
cli.StringFlag{
Name: "criu",
Value: "criu",
Usage: "path to the criu binary used for checkpoint and restore",
},
cli.BoolFlag{
Name: "systemd-cgroup",
Usage: "enable systemd cgroup support, expects cgroupsPath to be of form \"slice:prefix:name\" for e.g. \"system.slice:runc:434234\"",
},
}
app.Commands = []cli.Command{
checkpointCommand,
createCommand,
deleteCommand,
eventsCommand,
execCommand,
initCommand,
killCommand,
listCommand,
pauseCommand,
psCommand,
restoreCommand,
resumeCommand,
runCommand,
specCommand,
startCommand,
stateCommand,
updateCommand,
}
app.Before = func(context *cli.Context) error {
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
}
if path := context.GlobalString("log"); path != "" {
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666)
if err != nil {
return err
}
logrus.SetOutput(f)
}
switch context.GlobalString("log-format") {
case "text":
// retain logrus's default.
case "json":
logrus.SetFormatter(new(logrus.JSONFormatter))
default:
return fmt.Errorf("unknown log-format %q", context.GlobalString("log-format"))
}
return nil
}
// If the command returns an error, cli takes upon itself to print
// the error on cli.ErrWriter and exit.
// Use our own writer here to ensure the log gets sent to the right location.
cli.ErrWriter = &FatalWriter{cli.ErrWriter}
if err := app.Run(os.Args); err != nil {
fatal(err)
}
}
type FatalWriter struct {
cliErrWriter io.Writer
}
func (f *FatalWriter) Write(p []byte) (n int, err error) {
logrus.Error(string(p))
return f.cliErrWriter.Write(p)
}

View file

@ -1,21 +0,0 @@
// +build solaris
package main
import "github.com/urfave/cli"
var (
checkpointCommand cli.Command
eventsCommand cli.Command
restoreCommand cli.Command
specCommand cli.Command
killCommand cli.Command
deleteCommand cli.Command
execCommand cli.Command
initCommand cli.Command
listCommand cli.Command
pauseCommand cli.Command
resumeCommand cli.Command
startCommand cli.Command
stateCommand cli.Command
)

View file

@ -1,33 +0,0 @@
// +build linux
package main
import (
"os"
"runtime"
"github.com/opencontainers/runc/libcontainer"
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/urfave/cli"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
}
}
var initCommand = cli.Command{
Name: "init",
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
Action: func(context *cli.Context) error {
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
// as the error is sent back to the parent there is no need to log
// or write it to stderr because the parent process will handle this
os.Exit(1)
}
panic("libcontainer: container init failed to exec")
},
}

View file

@ -1,13 +0,0 @@
// +build !linux,!solaris
package main
import "github.com/urfave/cli"
var (
checkpointCommand cli.Command
eventsCommand cli.Command
restoreCommand cli.Command
specCommand cli.Command
killCommand cli.Command
)

View file

@ -1,108 +0,0 @@
// +build linux
package main
import (
"bytes"
"fmt"
"net"
"path/filepath"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
type notifySocket struct {
socket *net.UnixConn
host string
socketPath string
}
func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket {
if notifySocketHost == "" {
return nil
}
root := filepath.Join(context.GlobalString("root"), id)
path := filepath.Join(root, "notify.sock")
notifySocket := &notifySocket{
socket: nil,
host: notifySocketHost,
socketPath: path,
}
return notifySocket
}
func (ns *notifySocket) Close() error {
return ns.socket.Close()
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}}
spec.Mounts = append(spec.Mounts, mount)
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
}
func (s *notifySocket) setupSocket() error {
addr := net.UnixAddr{
Name: s.socketPath,
Net: "unixgram",
}
socket, err := net.ListenUnixgram("unixgram", &addr)
if err != nil {
return err
}
s.socket = socket
return nil
}
// pid1 must be set only with -d, as it is used to set the new process as the main process
// for the service in systemd
func (notifySocket *notifySocket) run(pid1 int) {
buf := make([]byte, 512)
notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
if err != nil {
logrus.Error(err)
return
}
for {
r, err := notifySocket.socket.Read(buf)
if err != nil {
break
}
var out bytes.Buffer
for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
if bytes.HasPrefix(line, []byte("READY=")) {
_, err = out.Write(line)
if err != nil {
return
}
_, err = out.Write([]byte{'\n'})
if err != nil {
return
}
_, err = client.Write(out.Bytes())
if err != nil {
return
}
// now we can inform systemd to use pid1 as the pid to monitor
if pid1 > 0 {
newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
client.Write([]byte(newPid))
}
return
}
}
}
}

View file

@ -1,57 +0,0 @@
// +build linux
package main
import "github.com/urfave/cli"
var pauseCommand = cli.Command{
Name: "pause",
Usage: "pause suspends all processes inside the container",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container to be
paused. `,
Description: `The pause command suspends all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
if err := container.Pause(); err != nil {
return err
}
return nil
},
}
var resumeCommand = cli.Command{
Name: "resume",
Usage: "resumes all processes that have been previously paused",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container to be
resumed.`,
Description: `The resume command resumes all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
if err := container.Resume(); err != nil {
return err
}
return nil
},
}

View file

@ -1,109 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"github.com/urfave/cli"
)
var psCommand = cli.Command{
Name: "ps",
Usage: "ps displays the processes running inside a container",
ArgsUsage: `<container-id> [ps options]`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "format, f",
Value: "table",
Usage: `select one of: ` + formatOptions,
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
// XXX: Currently not supported with rootless containers.
if isRootless() {
return fmt.Errorf("runc ps requires root")
}
container, err := getContainer(context)
if err != nil {
return err
}
pids, err := container.Processes()
if err != nil {
return err
}
switch context.String("format") {
case "table":
case "json":
return json.NewEncoder(os.Stdout).Encode(pids)
default:
return fmt.Errorf("invalid format option")
}
// [1:] is to remove command name, ex:
// context.Args(): [containet_id ps_arg1 ps_arg2 ...]
// psArgs: [ps_arg1 ps_arg2 ...]
//
psArgs := context.Args()[1:]
if len(psArgs) == 0 {
psArgs = []string{"-ef"}
}
cmd := exec.Command("ps", psArgs...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%s: %s", err, output)
}
lines := strings.Split(string(output), "\n")
pidIndex, err := getPidIndex(lines[0])
if err != nil {
return err
}
fmt.Println(lines[0])
for _, line := range lines[1:] {
if len(line) == 0 {
continue
}
fields := strings.Fields(line)
p, err := strconv.Atoi(fields[pidIndex])
if err != nil {
return fmt.Errorf("unexpected pid '%s': %s", fields[pidIndex], err)
}
for _, pid := range pids {
if pid == p {
fmt.Println(line)
break
}
}
}
return nil
},
SkipArgReorder: true,
}
func getPidIndex(title string) (int, error) {
titles := strings.Fields(title)
pidIndex := -1
for i, name := range titles {
if name == "PID" {
return i, nil
}
}
return pidIndex, fmt.Errorf("couldn't find PID field in ps output")
}

View file

@ -1,216 +0,0 @@
// +build linux
package main
import (
"fmt"
"os"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var restoreCommand = cli.Command{
Name: "restore",
Usage: "restore a container from a previous checkpoint",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container to be
restored.`,
Description: `Restores the saved state of the container instance that was previously saved
using the runc checkpoint command.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "image-path",
Value: "",
Usage: "path to criu image files for restoring",
},
cli.StringFlag{
Name: "work-path",
Value: "",
Usage: "path for saving work files and logs",
},
cli.BoolFlag{
Name: "tcp-established",
Usage: "allow open tcp connections",
},
cli.BoolFlag{
Name: "ext-unix-sk",
Usage: "allow external unix sockets",
},
cli.BoolFlag{
Name: "shell-job",
Usage: "allow shell jobs",
},
cli.BoolFlag{
Name: "file-locks",
Usage: "handle file locks, for safety",
},
cli.StringFlag{
Name: "manage-cgroups-mode",
Value: "",
Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'",
},
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: "path to the root of the bundle directory",
},
cli.BoolFlag{
Name: "detach,d",
Usage: "detach from the container's process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.BoolFlag{
Name: "no-subreaper",
Usage: "disable the use of the subreaper used to reap reparented processes",
},
cli.BoolFlag{
Name: "no-pivot",
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
},
cli.StringSliceFlag{
Name: "empty-ns",
Usage: "create a namespace, but don't restore its properties",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc restore requires root")
}
imagePath := context.String("image-path")
id := context.Args().First()
if id == "" {
return errEmptyID
}
if imagePath == "" {
imagePath = getDefaultImagePath(context)
}
bundle := context.String("bundle")
if bundle != "" {
if err := os.Chdir(bundle); err != nil {
return err
}
}
spec, err := loadSpec(specConfig)
if err != nil {
return err
}
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id,
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
NoPivotRoot: context.Bool("no-pivot"),
Spec: spec,
})
if err != nil {
return err
}
status, err := restoreContainer(context, spec, config, imagePath)
if err == nil {
os.Exit(status)
}
return err
},
}
func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Config, imagePath string) (int, error) {
var (
rootuid = 0
rootgid = 0
id = context.Args().First()
)
factory, err := loadFactory(context)
if err != nil {
return -1, err
}
container, err := factory.Load(id)
if err != nil {
container, err = factory.Create(id, config)
if err != nil {
return -1, err
}
}
options := criuOptions(context)
status, err := container.Status()
if err != nil {
logrus.Error(err)
}
if status == libcontainer.Running {
fatalf("Container with id %s already running", id)
}
setManageCgroupsMode(context, options)
if err = setEmptyNsMask(context, options); err != nil {
return -1, err
}
// ensure that the container is always removed if we were the process
// that created it.
detach := context.Bool("detach")
if !detach {
defer destroy(container)
}
process := &libcontainer.Process{}
tty, err := setupIO(process, rootuid, rootgid, false, detach, "")
if err != nil {
return -1, err
}
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
if notifySocket != nil {
notifySocket.setupSpec(context, spec)
notifySocket.setupSocket()
}
handler := newSignalHandler(!context.Bool("no-subreaper"), notifySocket)
if err := container.Restore(process, options); err != nil {
return -1, err
}
// We don't need to do a tty.recvtty because config.Terminal is always false.
defer tty.Close()
if err := tty.ClosePostStart(); err != nil {
return -1, err
}
if pidFile := context.String("pid-file"); pidFile != "" {
if err := createPidFile(pidFile, process); err != nil {
_ = process.Signal(syscall.SIGKILL)
_, _ = process.Wait()
return -1, err
}
}
return handler.forward(process, tty, detach)
}
func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
imagePath := getCheckpointImagePath(context)
if err := os.MkdirAll(imagePath, 0655); err != nil {
fatal(err)
}
return &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
ParentImage: context.String("parent-path"),
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
}
}

View file

@ -1,49 +0,0 @@
package main
import "fmt"
const (
RLIMIT_CPU = iota // CPU time in sec
RLIMIT_FSIZE // Maximum filesize
RLIMIT_DATA // max data size
RLIMIT_STACK // max stack size
RLIMIT_CORE // max core file size
RLIMIT_RSS // max resident set size
RLIMIT_NPROC // max number of processes
RLIMIT_NOFILE // max number of open files
RLIMIT_MEMLOCK // max locked-in-memory address space
RLIMIT_AS // address space limit
RLIMIT_LOCKS // maximum file locks held
RLIMIT_SIGPENDING // max number of pending signals
RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues
RLIMIT_NICE // max nice prio allowed to raise to
RLIMIT_RTPRIO // maximum realtime priority
RLIMIT_RTTIME // timeout for RT tasks in us
)
var rlimitMap = map[string]int{
"RLIMIT_CPU": RLIMIT_CPU,
"RLIMIT_FSIZE": RLIMIT_FSIZE,
"RLIMIT_DATA": RLIMIT_DATA,
"RLIMIT_STACK": RLIMIT_STACK,
"RLIMIT_CORE": RLIMIT_CORE,
"RLIMIT_RSS": RLIMIT_RSS,
"RLIMIT_NPROC": RLIMIT_NPROC,
"RLIMIT_NOFILE": RLIMIT_NOFILE,
"RLIMIT_MEMLOCK": RLIMIT_MEMLOCK,
"RLIMIT_AS": RLIMIT_AS,
"RLIMIT_LOCKS": RLIMIT_LOCKS,
"RLIMIT_SIGPENDING": RLIMIT_SIGPENDING,
"RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE,
"RLIMIT_NICE": RLIMIT_NICE,
"RLIMIT_RTPRIO": RLIMIT_RTPRIO,
"RLIMIT_RTTIME": RLIMIT_RTTIME,
}
func strToRlimit(key string) (int, error) {
rl, ok := rlimitMap[key]
if !ok {
return 0, fmt.Errorf("wrong rlimit value: %s", key)
}
return rl, nil
}

View file

@ -1,84 +0,0 @@
// +build linux
package main
import (
"os"
"github.com/urfave/cli"
)
// default action is to start a container
var runCommand = cli.Command{
Name: "run",
Usage: "create and run a container",
ArgsUsage: `<container-id>
Where "<container-id>" is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique on
your host.`,
Description: `The run command creates an instance of a container for a bundle. The bundle
is a directory with a specification file named "` + specConfig + `" and a root
filesystem.
The specification file includes an args parameter. The args parameter is used
to specify command(s) that get run when the container is started. To change the
command(s) that get executed on start, edit the args parameter of the spec. See
"runc spec --help" for more explanation.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console-socket",
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.BoolFlag{
Name: "detach, d",
Usage: "detach from the container's process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.BoolFlag{
Name: "no-subreaper",
Usage: "disable the use of the subreaper used to reap reparented processes",
},
cli.BoolFlag{
Name: "no-pivot",
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
},
cli.BoolFlag{
Name: "no-new-keyring",
Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key",
},
cli.IntFlag{
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
if err := revisePidFile(context); err != nil {
return err
}
spec, err := setupSpec(context)
if err != nil {
return err
}
status, err := startContainer(context, spec, false)
if err == nil {
// exit with the container's exit status so any external supervisor is
// notified of the exit with the correct exit status.
os.Exit(status)
}
return err
},
}

View file

@ -1,135 +0,0 @@
// +build linux
package main
import (
"os"
"os/signal"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)
const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process.
// If notifySocket is present, use it to read systemd notifications from the container and
// forward them to notifySocketHost.
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler {
if enableSubreaper {
// set us as the subreaper before registering the signal handler for the container
if err := system.SetSubreaper(1); err != nil {
logrus.Warn(err)
}
}
// ensure that we have a large buffer size so that we do not miss any signals
// incase we are not processing them fast enough.
s := make(chan os.Signal, signalBufferSize)
// handle all signals for the process.
signal.Notify(s)
return &signalHandler{
signals: s,
notifySocket: notifySocket,
}
}
// exit models a process exit status with the pid and
// exit status.
type exit struct {
pid int
status int
}
type signalHandler struct {
signals chan os.Signal
notifySocket *notifySocket
}
// forward handles the main signal event loop forwarding, resizing, or reaping depending
// on the signal received.
func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach bool) (int, error) {
// make sure we know the pid of our main process so that we can return
// after it dies.
if detach && h.notifySocket == nil {
return 0, nil
}
pid1, err := process.Pid()
if err != nil {
return -1, err
}
if h.notifySocket != nil {
if detach {
h.notifySocket.run(pid1)
return 0, nil
} else {
go h.notifySocket.run(0)
}
}
// perform the initial tty resize.
tty.resize()
for s := range h.signals {
switch s {
case syscall.SIGWINCH:
tty.resize()
case syscall.SIGCHLD:
exits, err := h.reap()
if err != nil {
logrus.Error(err)
}
for _, e := range exits {
logrus.WithFields(logrus.Fields{
"pid": e.pid,
"status": e.status,
}).Debug("process exited")
if e.pid == pid1 {
// call Wait() on the process even though we already have the exit
// status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return.
process.Wait()
if h.notifySocket != nil {
h.notifySocket.Close()
}
return e.status, nil
}
}
default:
logrus.Debugf("sending signal to process %s", s)
if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil {
logrus.Error(err)
}
}
}
return -1, nil
}
// reap runs wait4 in a loop until we have finished processing any existing exits
// then returns all exits to the main event loop for further processing.
func (h *signalHandler) reap() (exits []exit, err error) {
var (
ws syscall.WaitStatus
rus syscall.Rusage
)
for {
pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus)
if err != nil {
if err == syscall.ECHILD {
return exits, nil
}
return nil, err
}
if pid <= 0 {
return exits, nil
}
exits = append(exits, exit{
pid: pid,
status: utils.ExitStatus(ws),
})
}
}

View file

@ -1,155 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"runtime"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var specCommand = cli.Command{
Name: "spec",
Usage: "create a new specification file",
ArgsUsage: "",
Description: `The spec command creates the new specification file named "` + specConfig + `" for
the bundle.
The spec generated is just a starter file. Editing of the spec is required to
achieve desired results. For example, the newly generated spec includes an args
parameter that is initially set to call the "sh" command when the container is
started. Calling "sh" may work for an ubuntu container or busybox, but will not
work for containers that do not include the "sh" program.
EXAMPLE:
To run docker's hello-world container one needs to set the args parameter
in the spec to call hello. This can be done using the sed command or a text
editor. The following commands create a bundle for hello-world, change the
default args parameter in the spec from "sh" to "/hello", then run the hello
command in a new hello-world container named container1:
mkdir hello
cd hello
docker pull hello-world
docker export $(docker create hello-world) > hello-world.tar
mkdir rootfs
tar -C rootfs -xf hello-world.tar
runc spec
sed -i 's;"sh";"/hello";' ` + specConfig + `
runc run container1
In the run command above, "container1" is the name for the instance of the
container that you are starting. The name you provide for the container instance
must be unique on your host.
An alternative for generating a customized spec config is to use "oci-runtime-tool", the
sub-command "oci-runtime-tool generate" has lots of options that can be used to do any
customizations as you want, see [runtime-tools](https://github.com/opencontainers/runtime-tools)
to get more information.
When starting a container through runc, runc needs root privilege. If not
already running as root, you can use sudo to give runc root privilege. For
example: "sudo runc start container1" will give runc root privilege to start the
container on your host.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: "path to the root of the bundle directory",
},
cli.BoolFlag{
Name: "rootless",
Usage: "generate a configuration for a rootless container",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 0, exactArgs); err != nil {
return err
}
spec := specconv.Example()
rootless := context.Bool("rootless")
if rootless {
specconv.ToRootless(spec)
}
checkNoFile := func(name string) error {
_, err := os.Stat(name)
if err == nil {
return fmt.Errorf("File %s exists. Remove it first", name)
}
if !os.IsNotExist(err) {
return err
}
return nil
}
bundle := context.String("bundle")
if bundle != "" {
if err := os.Chdir(bundle); err != nil {
return err
}
}
if err := checkNoFile(specConfig); err != nil {
return err
}
data, err := json.MarshalIndent(spec, "", "\t")
if err != nil {
return err
}
if err := ioutil.WriteFile(specConfig, data, 0666); err != nil {
return err
}
return nil
},
}
func sPtr(s string) *string { return &s }
// loadSpec loads the specification from the provided path.
func loadSpec(cPath string) (spec *specs.Spec, err error) {
cf, err := os.Open(cPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
}
return nil, err
}
defer cf.Close()
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
return nil, err
}
if err = validatePlatform(&spec.Platform); err != nil {
return nil, err
}
return spec, validateProcessSpec(&spec.Process)
}
func createLibContainerRlimit(rlimit specs.LinuxRlimit) (configs.Rlimit, error) {
rl, err := strToRlimit(rlimit.Type)
if err != nil {
return configs.Rlimit{}, err
}
return configs.Rlimit{
Type: rl,
Hard: rlimit.Hard,
Soft: rlimit.Soft,
}, nil
}
func validatePlatform(platform *specs.Platform) error {
if platform.OS != runtime.GOOS {
return fmt.Errorf("target os %s mismatch with current os %s", platform.OS, runtime.GOOS)
}
if platform.Arch != runtime.GOARCH {
return fmt.Errorf("target arch %s mismatch with current arch %s", platform.Arch, runtime.GOARCH)
}
return nil
}

View file

@ -1,43 +0,0 @@
package main
import (
"errors"
"fmt"
"github.com/opencontainers/runc/libcontainer"
"github.com/urfave/cli"
)
var startCommand = cli.Command{
Name: "start",
Usage: "executes the user defined process in a created container",
ArgsUsage: `<container-id>
Where "<container-id>" is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique on
your host.`,
Description: `The start command executes the user defined process in a created container.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
status, err := container.Status()
if err != nil {
return err
}
switch status {
case libcontainer.Created:
return container.Exec()
case libcontainer.Stopped:
return errors.New("cannot start a container that has stopped")
case libcontainer.Running:
return errors.New("cannot start an already running container")
default:
return fmt.Errorf("cannot start a container in the %s state\n", status)
}
},
}

View file

@ -1,60 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"os"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli"
)
var stateCommand = cli.Command{
Name: "state",
Usage: "output the state of a container",
ArgsUsage: `<container-id>
Where "<container-id>" is your name for the instance of the container.`,
Description: `The state command outputs current state information for the
instance of a container.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
containerStatus, err := container.Status()
if err != nil {
return err
}
state, err := container.State()
if err != nil {
return err
}
pid := state.BaseState.InitProcessPid
if containerStatus == libcontainer.Stopped {
pid = 0
}
bundle, annotations := utils.Annotations(state.Config.Labels)
cs := containerState{
Version: state.BaseState.Config.Version,
ID: state.BaseState.ID,
InitProcessPid: pid,
Status: containerStatus.String(),
Bundle: bundle,
Rootfs: state.BaseState.Config.Rootfs,
Created: state.BaseState.Created,
Annotations: annotations,
}
data, err := json.MarshalIndent(cs, "", " ")
if err != nil {
return err
}
os.Stdout.Write(data)
return nil
},
}

View file

@ -1,134 +0,0 @@
// +build linux
package main
import (
"fmt"
"io"
"os"
"sync"
"github.com/docker/docker/pkg/term"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
)
type tty struct {
console libcontainer.Console
state *term.State
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
consoleC chan error
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
}
// setup pipes for the process so that advanced features like c/r are able to easily checkpoint
// and restore the process's IO without depending on a host specific path or device
func setupProcessPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) {
i, err := p.InitializeIO(rootuid, rootgid)
if err != nil {
return nil, err
}
t := &tty{
closers: []io.Closer{
i.Stdin,
i.Stdout,
i.Stderr,
},
}
// add the process's io to the post start closers if they support close
for _, cc := range []interface{}{
p.Stdin,
p.Stdout,
p.Stderr,
} {
if c, ok := cc.(io.Closer); ok {
t.postStart = append(t.postStart, c)
}
}
go func() {
io.Copy(i.Stdin, os.Stdin)
i.Stdin.Close()
}()
t.wg.Add(2)
go t.copyIO(os.Stdout, i.Stdout)
go t.copyIO(os.Stderr, i.Stderr)
return t, nil
}
func inheritStdio(process *libcontainer.Process) error {
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
return nil
}
func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) error {
f, err := utils.RecvFd(socket)
if err != nil {
return err
}
console := libcontainer.ConsoleFromFile(f)
go io.Copy(console, os.Stdin)
t.wg.Add(1)
go t.copyIO(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return fmt.Errorf("failed to set the terminal from the stdin: %v", err)
}
t.state = state
t.console = console
t.closers = []io.Closer{console}
return nil
}
func (t *tty) waitConsole() error {
if t.consoleC != nil {
return <-t.consoleC
}
return nil
}
// ClosePostStart closes any fds that are provided to the container and dup2'd
// so that we no longer have copy in our process.
func (t *tty) ClosePostStart() error {
for _, c := range t.postStart {
c.Close()
}
return nil
}
// Close closes all open fds for the tty and/or restores the orignal
// stdin state to what it was prior to the container execution
func (t *tty) Close() error {
// ensure that our side of the fds are always closed
for _, c := range t.postStart {
c.Close()
}
// wait for the copy routines to finish before closing the fds
t.wg.Wait()
for _, c := range t.closers {
c.Close()
}
if t.state != nil {
term.RestoreTerminal(os.Stdin.Fd(), t.state)
}
return nil
}
func (t *tty) resize() error {
if t.console == nil {
return nil
}
ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil {
return err
}
return term.SetWinsize(t.console.File().Fd(), ws)
}

View file

@ -1,250 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"strconv"
"github.com/docker/go-units"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
func i64Ptr(i int64) *int64 { return &i }
func u64Ptr(i uint64) *uint64 { return &i }
func u16Ptr(i uint16) *uint16 { return &i }
var updateCommand = cli.Command{
Name: "update",
Usage: "update container resource constraints",
ArgsUsage: `<container-id>`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "resources, r",
Value: "",
Usage: `path to the file containing the resources to update or '-' to read from the standard input
The accepted format is as follow (unchanged values can be omitted):
{
"memory": {
"limit": 0,
"reservation": 0,
"swap": 0,
"kernel": 0,
"kernelTCP": 0
},
"cpu": {
"shares": 0,
"quota": 0,
"period": 0,
"realtimeRuntime": 0,
"realtimePeriod": 0,
"cpus": "",
"mems": ""
},
"blockIO": {
"blkioWeight": 0
}
}
Note: if data is to be read from a file or the standard input, all
other options are ignored.
`,
},
cli.IntFlag{
Name: "blkio-weight",
Usage: "Specifies per cgroup weight, range is from 10 to 1000",
},
cli.StringFlag{
Name: "cpu-period",
Usage: "CPU CFS period to be used for hardcapping (in usecs). 0 to use system default",
},
cli.StringFlag{
Name: "cpu-quota",
Usage: "CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period",
},
cli.StringFlag{
Name: "cpu-share",
Usage: "CPU shares (relative weight vs. other containers)",
},
cli.StringFlag{
Name: "cpu-rt-period",
Usage: "CPU realtime period to be used for hardcapping (in usecs). 0 to use system default",
},
cli.StringFlag{
Name: "cpu-rt-runtime",
Usage: "CPU realtime hardcap limit (in usecs). Allowed cpu time in a given period",
},
cli.StringFlag{
Name: "cpuset-cpus",
Usage: "CPU(s) to use",
},
cli.StringFlag{
Name: "cpuset-mems",
Usage: "Memory node(s) to use",
},
cli.StringFlag{
Name: "kernel-memory",
Usage: "Kernel memory limit (in bytes)",
},
cli.StringFlag{
Name: "kernel-memory-tcp",
Usage: "Kernel memory limit (in bytes) for tcp buffer",
},
cli.StringFlag{
Name: "memory",
Usage: "Memory limit (in bytes)",
},
cli.StringFlag{
Name: "memory-reservation",
Usage: "Memory reservation or soft_limit (in bytes)",
},
cli.StringFlag{
Name: "memory-swap",
Usage: "Total memory usage (memory + swap); set '-1' to enable unlimited swap",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
r := specs.LinuxResources{
Memory: &specs.LinuxMemory{
Limit: u64Ptr(0),
Reservation: u64Ptr(0),
Swap: u64Ptr(0),
Kernel: u64Ptr(0),
KernelTCP: u64Ptr(0),
},
CPU: &specs.LinuxCPU{
Shares: u64Ptr(0),
Quota: i64Ptr(0),
Period: u64Ptr(0),
RealtimeRuntime: i64Ptr(0),
RealtimePeriod: u64Ptr(0),
Cpus: "",
Mems: "",
},
BlockIO: &specs.LinuxBlockIO{
Weight: u16Ptr(0),
},
}
config := container.Config()
if in := context.String("resources"); in != "" {
var (
f *os.File
err error
)
switch in {
case "-":
f = os.Stdin
default:
f, err = os.Open(in)
if err != nil {
return err
}
}
err = json.NewDecoder(f).Decode(&r)
if err != nil {
return err
}
} else {
if val := context.Int("blkio-weight"); val != 0 {
r.BlockIO.Weight = u16Ptr(uint16(val))
}
if val := context.String("cpuset-cpus"); val != "" {
r.CPU.Cpus = val
}
if val := context.String("cpuset-mems"); val != "" {
r.CPU.Mems = val
}
for _, pair := range []struct {
opt string
dest *uint64
}{
{"cpu-period", r.CPU.Period},
{"cpu-rt-period", r.CPU.RealtimePeriod},
{"cpu-share", r.CPU.Shares},
} {
if val := context.String(pair.opt); val != "" {
var err error
*pair.dest, err = strconv.ParseUint(val, 10, 64)
if err != nil {
return fmt.Errorf("invalid value for %s: %s", pair.opt, err)
}
}
}
for _, pair := range []struct {
opt string
dest *int64
}{
{"cpu-quota", r.CPU.Quota},
{"cpu-rt-runtime", r.CPU.RealtimeRuntime},
} {
if val := context.String(pair.opt); val != "" {
var err error
*pair.dest, err = strconv.ParseInt(val, 10, 64)
if err != nil {
return fmt.Errorf("invalid value for %s: %s", pair.opt, err)
}
}
}
for _, pair := range []struct {
opt string
dest *uint64
}{
{"memory", r.Memory.Limit},
{"memory-swap", r.Memory.Swap},
{"kernel-memory", r.Memory.Kernel},
{"kernel-memory-tcp", r.Memory.KernelTCP},
{"memory-reservation", r.Memory.Reservation},
} {
if val := context.String(pair.opt); val != "" {
var v int64
if val != "-1" {
v, err = units.RAMInBytes(val)
if err != nil {
return fmt.Errorf("invalid value for %s: %s", pair.opt, err)
}
} else {
v = -1
}
*pair.dest = uint64(v)
}
}
}
// Update the value
config.Cgroups.Resources.BlkioWeight = *r.BlockIO.Weight
config.Cgroups.Resources.CpuPeriod = *r.CPU.Period
config.Cgroups.Resources.CpuQuota = *r.CPU.Quota
config.Cgroups.Resources.CpuShares = *r.CPU.Shares
config.Cgroups.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod
config.Cgroups.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime
config.Cgroups.Resources.CpusetCpus = r.CPU.Cpus
config.Cgroups.Resources.CpusetMems = r.CPU.Mems
config.Cgroups.Resources.KernelMemory = *r.Memory.Kernel
config.Cgroups.Resources.KernelMemoryTCP = *r.Memory.KernelTCP
config.Cgroups.Resources.Memory = *r.Memory.Limit
config.Cgroups.Resources.MemoryReservation = *r.Memory.Reservation
config.Cgroups.Resources.MemorySwap = *r.Memory.Swap
return container.Set(config)
},
}

View file

@ -1,82 +0,0 @@
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
const (
exactArgs = iota
minArgs
maxArgs
)
func checkArgs(context *cli.Context, expected, checkType int) error {
var err error
cmdName := context.Command.Name
switch checkType {
case exactArgs:
if context.NArg() != expected {
err = fmt.Errorf("%s: %q requires exactly %d argument(s)", os.Args[0], cmdName, expected)
}
case minArgs:
if context.NArg() < expected {
err = fmt.Errorf("%s: %q requires a minimum of %d argument(s)", os.Args[0], cmdName, expected)
}
case maxArgs:
if context.NArg() > expected {
err = fmt.Errorf("%s: %q requires a maximum of %d argument(s)", os.Args[0], cmdName, expected)
}
}
if err != nil {
fmt.Printf("Incorrect Usage.\n\n")
cli.ShowCommandHelp(context, cmdName)
return err
}
return nil
}
// fatal prints the error's details if it is a libcontainer specific error type
// then exits the program with an exit status of 1.
func fatal(err error) {
// make sure the error is written to the logger
logrus.Error(err)
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
// setupSpec performs initial setup based on the cli.Context for the container
func setupSpec(context *cli.Context) (*specs.Spec, error) {
bundle := context.String("bundle")
if bundle != "" {
if err := os.Chdir(bundle); err != nil {
return nil, err
}
}
spec, err := loadSpec(specConfig)
if err != nil {
return nil, err
}
return spec, nil
}
func revisePidFile(context *cli.Context) error {
pidFile := context.String("pid-file")
if pidFile == "" {
return nil
}
// convert pid-file to an absolute path so we can write to the right
// file after chdir to bundle
pidFile, err := filepath.Abs(pidFile)
if err != nil {
return err
}
return context.Set("pid-file", pidFile)
}

View file

@ -1,378 +0,0 @@
// +build linux
package main
import (
"errors"
"fmt"
"net"
"os"
"path/filepath"
"strconv"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/coreos/go-systemd/activation"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var errEmptyID = errors.New("container id cannot be empty")
var container libcontainer.Container
// loadFactory returns the configured factory instance for execing containers.
func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
root := context.GlobalString("root")
abs, err := filepath.Abs(root)
if err != nil {
return nil, err
}
cgroupManager := libcontainer.Cgroupfs
if context.GlobalBool("systemd-cgroup") {
if systemd.UseSystemd() {
cgroupManager = libcontainer.SystemdCgroups
} else {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
}
}
return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu")))
}
// getContainer returns the specified container instance by loading it from state
// with the default factory.
func getContainer(context *cli.Context) (libcontainer.Container, error) {
id := context.Args().First()
if id == "" {
return nil, errEmptyID
}
factory, err := loadFactory(context)
if err != nil {
return nil, err
}
return factory.Load(id)
}
func fatalf(t string, v ...interface{}) {
fatal(fmt.Errorf(t, v...))
}
func getDefaultImagePath(context *cli.Context) string {
cwd, err := os.Getwd()
if err != nil {
panic(err)
}
return filepath.Join(cwd, "checkpoint")
}
// newProcess returns a new libcontainer Process with the arguments from the
// spec and stdio from the current process.
func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp := &libcontainer.Process{
Args: p.Args,
Env: p.Env,
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
Cwd: p.Cwd,
Label: p.SelinuxLabel,
NoNewPrivileges: &p.NoNewPrivileges,
AppArmorProfile: p.ApparmorProfile,
}
if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
lp.Capabilities.Effective = p.Capabilities.Effective
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
lp.Capabilities.Permitted = p.Capabilities.Permitted
lp.Capabilities.Ambient = p.Capabilities.Ambient
}
for _, gid := range p.User.AdditionalGids {
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
}
for _, rlimit := range p.Rlimits {
rl, err := createLibContainerRlimit(rlimit)
if err != nil {
return nil, err
}
lp.Rlimits = append(lp.Rlimits, rl)
}
return lp, nil
}
func destroy(container libcontainer.Container) {
if err := container.Destroy(); err != nil {
logrus.Error(err)
}
}
// setupIO modifies the given process config according to the options.
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
if createTTY {
process.Stdin = nil
process.Stdout = nil
process.Stderr = nil
t := &tty{}
if !detach {
parent, child, err := utils.NewSockPair("console")
if err != nil {
return nil, err
}
process.ConsoleSocket = child
t.postStart = append(t.postStart, parent, child)
t.consoleC = make(chan error, 1)
go func() {
if err := t.recvtty(process, parent); err != nil {
t.consoleC <- err
}
t.consoleC <- nil
}()
} else {
// the caller of runc will handle receiving the console master
conn, err := net.Dial("unix", sockpath)
if err != nil {
return nil, err
}
uc, ok := conn.(*net.UnixConn)
if !ok {
return nil, fmt.Errorf("casting to UnixConn failed")
}
t.postStart = append(t.postStart, uc)
socket, err := uc.File()
if err != nil {
return nil, err
}
t.postStart = append(t.postStart, socket)
process.ConsoleSocket = socket
}
return t, nil
}
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
// and the container's process inherits runc's stdio.
if detach {
if err := inheritStdio(process); err != nil {
return nil, err
}
return &tty{}, nil
}
return setupProcessPipes(process, rootuid, rootgid)
}
// createPidFile creates a file with the processes pid inside it atomically
// it creates a temp file with the paths filename + '.' infront of it
// then renames the file
func createPidFile(path string, process *libcontainer.Process) error {
pid, err := process.Pid()
if err != nil {
return err
}
var (
tmpDir = filepath.Dir(path)
tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path)))
)
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
if err != nil {
return err
}
_, err = fmt.Fprintf(f, "%d", pid)
f.Close()
if err != nil {
return err
}
return os.Rename(tmpName, path)
}
// XXX: Currently we autodetect rootless mode.
func isRootless() bool {
return os.Geteuid() != 0
}
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id,
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
NoPivotRoot: context.Bool("no-pivot"),
NoNewKeyring: context.Bool("no-new-keyring"),
Spec: spec,
Rootless: isRootless(),
})
if err != nil {
return nil, err
}
factory, err := loadFactory(context)
if err != nil {
return nil, err
}
return factory.Create(id, config)
}
type runner struct {
enableSubreaper bool
shouldDestroy bool
detach bool
listenFDs []*os.File
preserveFDs int
pidFile string
consoleSocket string
container libcontainer.Container
create bool
notifySocket *notifySocket
}
func (r *runner) run(config *specs.Process) (int, error) {
if err := r.checkTerminal(config); err != nil {
r.destroy()
return -1, err
}
process, err := newProcess(*config)
if err != nil {
r.destroy()
return -1, err
}
if len(r.listenFDs) > 0 {
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
}
baseFd := 3 + len(process.ExtraFiles)
for i := baseFd; i < baseFd+r.preserveFDs; i++ {
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
}
rootuid, err := r.container.Config().HostRootUID()
if err != nil {
r.destroy()
return -1, err
}
rootgid, err := r.container.Config().HostRootGID()
if err != nil {
r.destroy()
return -1, err
}
var (
detach = r.detach || r.create
startFn = r.container.Start
)
if !r.create {
startFn = r.container.Run
}
// Setting up IO is a two stage process. We need to modify process to deal
// with detaching containers, and then we get a tty after the container has
// started.
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket)
if err != nil {
r.destroy()
return -1, err
}
defer tty.Close()
if err = startFn(process); err != nil {
r.destroy()
return -1, err
}
if err := tty.waitConsole(); err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
if err = tty.ClosePostStart(); err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
if r.pidFile != "" {
if err = createPidFile(r.pidFile, process); err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
}
status, err := handler.forward(process, tty, detach)
if err != nil {
r.terminate(process)
}
if detach {
return 0, nil
}
r.destroy()
return status, err
}
func (r *runner) destroy() {
if r.shouldDestroy {
destroy(r.container)
}
}
func (r *runner) terminate(p *libcontainer.Process) {
_ = p.Signal(syscall.SIGKILL)
_, _ = p.Wait()
}
func (r *runner) checkTerminal(config *specs.Process) error {
detach := r.detach || r.create
// Check command-line for sanity.
if detach && config.Terminal && r.consoleSocket == "" {
return fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
}
if (!detach || !config.Terminal) && r.consoleSocket != "" {
return fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
}
return nil
}
func validateProcessSpec(spec *specs.Process) error {
if spec.Cwd == "" {
return fmt.Errorf("Cwd property must not be empty")
}
if !filepath.IsAbs(spec.Cwd) {
return fmt.Errorf("Cwd must be an absolute path")
}
if len(spec.Args) == 0 {
return fmt.Errorf("args must not be empty")
}
return nil
}
func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, error) {
id := context.Args().First()
if id == "" {
return -1, errEmptyID
}
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
if notifySocket != nil {
notifySocket.setupSpec(context, spec)
}
container, err := createContainer(context, id, spec)
if err != nil {
return -1, err
}
if notifySocket != nil {
notifySocket.setupSocket()
}
// Support on-demand socket activation by passing file descriptors into the container init process.
listenFDs := []*os.File{}
if os.Getenv("LISTEN_FDS") != "" {
listenFDs = activation.Files(false)
}
r := &runner{
enableSubreaper: !context.Bool("no-subreaper"),
shouldDestroy: true,
container: container,
listenFDs: listenFDs,
notifySocket: notifySocket,
consoleSocket: context.String("console-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
preserveFDs: context.Int("preserve-fds"),
create: create,
}
return r.run(&spec.Process)
}