この記事で書くこと
前回に引き続いて crictl
からContainer Runtimeを操作してContainerを起動してみる. 前回との違いはLow level Runtime.
- High level Runtime: containerd
- Low level Runtime: runsc (gVisor)1
環境構築
前回と同じ構築手順を実施します.
前回の構築手順の要約
- 環境
- OS: Ubuntu18.04
- Infra: AWS EC2 t3a.nano
- インストールおよび設定
sudo apt purge -y docker docker-engine docker.io containerd runc
sudo apt update -y
sudo apt install -y \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
| sudo apt-key add -
export OS=xUbuntu_18.04
curl -fsSL https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/$OS/Release.key \
| sudo apt-key add -
sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
export OS=xUbuntu_18.04
echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/$OS/ /" \
| sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
sudo apt update -y
sudo apt install -y \
containerd.io \
cri-tools \
containernetworking-plugins
cat << __EOT__ | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
__EOT__
sudo modprobe overlay
sudo modprobe br_netfilter
cat << __EOT__ | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
__EOT__
sudo sysctl --system
sudo mkdir -p /etc/containerd
sudo containerd config default | sudo tee /etc/containerd/config.toml
cat << __EOT__ | sudo tee /etc/cni/net.d/50-bridge.conflist
{
"cniVersion": "0.4.0",
"name": "cni-bridge",
"plugins": [
{
"type": "bridge",
"bridge": "cni-bridge0",
"isGateway": true,
"isMasq": true,
"hairpinMode": true,
"ipam": {
"type": "host-local",
"routes": [
{
"dst": "0.0.0.0/0"
}
],
"ranges": [
[
{
"subnet": "10.66.0.0/16",
"gateway": "10.66.0.1"
}
]
]
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
},
{
"type": "firewall"
},
{
"type": "tuning"
}
]
}
__EOT__
sudo systemctl restart containerd
echo 'runtime-endpoint: unix:///run/containerd/containerd.sock' | sudo tee /etc/crictl.yaml
さらにgVisorをインストールする. 公式サイトの手順を参考にするが, ここまでの手順と重複する部分は省略できる.
gVisorインストールコマンド
# 以下2コマンドはここまでの手順と重複するので省略できる
# sudo apt update -y
# sudo apt install -y \
# apt-transport-https \
# ca-certificates \
# curl \
# gnupg-agent \
# software-properties-common
curl -fsSL https://gvisor.dev/archive.key | sudo apt-key add -
sudo add-apt-repository "deb https://storage.googleapis.com/gvisor/releases release main"
sudo apt update -y && sudo apt install -y runsc
containerdの設定ファイルにrunscを呼び出すための以下の設定を追加する.
/etc/containerd/config.toml
に以下の内容を追記する. tomlファイルなのでインデントや追記行はどこでも構わないが, あとの追記例のように, runcの設定の後にインデントを合わせて追加するのがおすすめ.
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
runtime_type = "io.containerd.runsc.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
追記例
version = 2
root = "/var/lib/containerd"
state = "/run/containerd"
plugin_dir = ""
disabled_plugins = []
required_plugins = []
oom_score = 0
[grpc]
address = "/run/containerd/containerd.sock"
tcp_address = ""
tcp_tls_cert = ""
tcp_tls_key = ""
uid = 0
gid = 0
max_recv_message_size = 16777216
max_send_message_size = 16777216
[ttrpc]
address = ""
uid = 0
gid = 0
[debug]
address = ""
uid = 0
gid = 0
level = ""
[metrics]
address = ""
grpc_histogram = false
[cgroup]
path = ""
[timeouts]
"io.containerd.timeout.shim.cleanup" = "5s"
"io.containerd.timeout.shim.load" = "5s"
"io.containerd.timeout.shim.shutdown" = "3s"
"io.containerd.timeout.task.state" = "2s"
[plugins]
[plugins."io.containerd.gc.v1.scheduler"]
pause_threshold = 0.02
deletion_threshold = 0
mutation_threshold = 100
schedule_delay = "0s"
startup_delay = "100ms"
[plugins."io.containerd.grpc.v1.cri"]
disable_tcp_service = true
stream_server_address = "127.0.0.1"
stream_server_port = "0"
stream_idle_timeout = "4h0m0s"
enable_selinux = false
selinux_category_range = 1024
sandbox_image = "k8s.gcr.io/pause:3.2"
stats_collect_period = 10
systemd_cgroup = false
enable_tls_streaming = false
max_container_log_line_size = 16384
disable_cgroup = false
disable_apparmor = false
restrict_oom_score_adj = false
max_concurrent_downloads = 3
disable_proc_mount = false
unset_seccomp_profile = ""
tolerate_missing_hugetlb_controller = true
disable_hugetlb_controller = true
ignore_image_defined_volumes = false
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "overlayfs"
default_runtime_name = "runc"
no_pivot = false
disable_snapshot_annotations = true
discard_unpacked_layers = false
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
runtime_type = ""
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false
base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
runtime_type = ""
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false
base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false
base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
+ runtime_type = "io.containerd.runsc.v1"
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
max_conf_num = 1
conf_template = ""
[plugins."io.containerd.grpc.v1.cri".registry]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://registry-1.docker.io"]
[plugins."io.containerd.grpc.v1.cri".image_decryption]
key_model = ""
[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
tls_cert_file = ""
tls_key_file = ""
[plugins."io.containerd.internal.v1.opt"]
path = "/opt/containerd"
[plugins."io.containerd.internal.v1.restart"]
interval = "10s"
[plugins."io.containerd.metadata.v1.bolt"]
content_sharing_policy = "shared"
[plugins."io.containerd.monitor.v1.cgroups"]
no_prometheus = false
[plugins."io.containerd.runtime.v1.linux"]
shim = "containerd-shim"
runtime = "runc"
runtime_root = ""
no_shim = false
shim_debug = false
[plugins."io.containerd.runtime.v2.task"]
platforms = ["linux/amd64"]
[plugins."io.containerd.service.v1.diff-service"]
default = ["walking"]
[plugins."io.containerd.snapshotter.v1.devmapper"]
root_path = ""
pool_name = ""
base_image_size = ""
async_remove = false
Container実行
これも前回と同様に設定ファイルを作ります.
設定ファイル
{
"metadata": {
"name": "nginx",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsc"
},
"log_directory": "/tmp",
"port_mappings": [
{
"protocol": 0,
"container_port": 80,
"host_port": 80
}
],
"linux": {
"security_context": {
"privileged": true
}
}
}
{
"metadata": {
"name": "nginx"
},
"image":{
"image": "nginx:1.19"
},
"mounts": [
{
"container_path": "/usr/share/nginx/html/",
"host_path": "/home/ubuntu/html/"
}
],
"log_path":"nginx.0.log",
"linux": {
"security_context": {
"privileged": true
}
}
}
<!DOCTYPE html>
<html>
<head>
<title>Hello Container!</title>
</head>
<body>
<h1>Hello, Container world!</h1>
<img style="width: 100px;" src="https://raw.githubusercontent.com/opencontainers/artwork/master/oci/icon/color/oci-icon-color.svg">
</body>
</html>
基本的にContainer起動のコマンドも前回同様ですが, runp
コマンド実行時に --runtime=runsc
オプションを付与します.
podid="$(sudo crictl runp --runtime=runsc nginx-pod.json)"
containerid="$(sudo crictl create "${podid}" nginx-container.json nginx-pod.json)"
sudo crictl start "${containerid}"
または以下のように run
コマンドで一度に pod と containerを両方起動することもできます.
sudo crictl run --runtime=runsc nginx-container.json nginx-pod.json
実行したContainerがホストカーネルではなくユーザ空間カーネルで実行されていることを確認しましょう. 少し間接的[^2]ではありますが dmesg
を今起動したContainerでexecすることで確認してみましょう.
$ sudo crictl exec "${containerid}" dmesg
[ 0.000000] Starting gVisor...
[ 0.370653] Preparing for the zombie uprising...
[ 0.407931] Gathering forks...
[ 0.779895] Waiting for children...
[ 0.871540] Checking naughty and nice process list...
[ 0.884175] Feeding the init monster...
[ 1.382653] Checking naughty and nice process list...
[ 1.421019] Creating cloned children...
[ 1.435422] Digging up root...
[ 1.634647] Reticulating splines...
[ 2.081503] Daemonizing children...
[ 2.194071] Ready!
以下のruncの場合にはホストカーネルのdmesgと同内容が見えていることとは対照的です.
runcの場合
ubuntu@ip-172-31-17-133:~$ sudo crictl exec "${containerid}" dmesg
[ 0.000000] Linux version 5.4.0-1032-aws (buildd@lcy01-amd64-019) (gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #33~18.04.1-Ubuntu SMP Thu Dec 10 08:19:06 UTC 2020 (Ubuntu 5.4.0-1032.33~18.04.1-aws 5.4.73)
[ 0.000000] Command line: BOOT_IMAGE=/boot/vmlinuz-5.4.0-1032-aws root=UUID=1b28661d-dff9-40d3-970a-8798394d54f6 ro console=tty1 console=ttyS0 nvme_core.io_timeout=4294967295
[ 0.000000] KERNEL supported cpus:
[ 0.000000] Intel GenuineIntel
[ 0.000000] AMD AuthenticAMD
[ 0.000000] Hygon HygonGenuine
[ 0.000000] Centaur CentaurHauls
[ 0.000000] zhaoxin Shanghai
... (後略) ...
また, 別の違いとしてホスト側から見たときにContainer内のプロセスがそのままは見えないという点もあります.
$ sudo ps axfwww
... (略) ...
9620 ? Sl 0:00 /usr/bin/containerd-shim-runsc-v1 -namespace k8s.io -address /run/containerd/containerd.sock -publish-binary /usr/bin/containerd
9645 ? Sl 0:00 \_ runsc-gofer --root=/run/containerd/runsc/k8s.io --log=/run/containerd/io.containerd.runtime.v2.task/k8s.io/3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0/log.json --log-format=json --log-fd=3 gofer --bundle /run/containerd/io.containerd.runtime.v2.task/k8s.io/3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0 --spec-fd=4 --mounts-fd=5 --io-fds=6 --io-fds=7 --apply-caps=false --setup-root=false
9646 ? Ssl 0:00 \_ runsc-sandbox --root=/run/containerd/runsc/k8s.io --log=/run/containerd/io.containerd.runtime.v2.task/k8s.io/3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0/log.json --log-format=json --log-fd=3 boot --bundle=/run/containerd/io.containerd.runtime.v2.task/k8s.io/3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0 --controller-fd=4 --mounts-fd=5 --spec-fd=6 --start-sync-fd=7 --io-fds=8 --io-fds=9 --stdio-fds=10 --stdio-fds=11 --stdio-fds=12 --cpu-num 2 --user-log-fd 13 3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0
9667 ? tsl 0:00 | \_ [exe]
9696 ? tl 0:00 | \_ [exe]
9764 ? tl 0:00 | \_ [exe]
9778 ? tl 0:00 | \_ [exe]
9783 ? tl 0:00 | \_ [exe]
9790 ? tl 0:00 | \_ [exe]
9809 ? tl 0:00 | \_ [exe]
9828 ? tl 0:00 | \_ [exe]
9831 ? tl 0:00 | \_ [exe]
9842 ? tl 0:00 | \_ [exe]
9876 ? tl 0:00 | \_ [exe]
9693 ? Sl 0:00 \_ runsc --root=/run/containerd/runsc/k8s.io --log=/run/containerd/io.containerd.runtime.v2.task/k8s.io/3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0/log.json --log-format=json wait 3cb91766dc375dea6a318a246fb79bb7b13479286d86ebccf52000d3b03b41b0
9717 ? Sl 0:00 /usr/bin/containerd-shim-runsc-v1 -namespace k8s.io -address /run/containerd/containerd.sock -publish-binary /usr/bin/containerd
9736 ? Sl 0:00 \_ runsc-gofer --root=/run/containerd/runsc/k8s.io --log=/run/containerd/io.containerd.runtime.v2.task/k8s.io/e13b8c85c8b7c9775de3d55b828a847de79849e8c78188413926e8b230731fb9/log.json --log-format=json --log-fd=3 gofer --bundle /run/containerd/io.containerd.runtime.v2.task/k8s.io/e13b8c85c8b7c9775de3d55b828a847de79849e8c78188413926e8b230731fb9 --spec-fd=4 --mounts-fd=5 --io-fds=6 --io-fds=7 --io-fds=8 --io-fds=9 --io-fds=10 --apply-caps=false --setup-root=false
9768 ? Sl 0:00 \_ runsc --root=/run/containerd/runsc/k8s.io --log=/run/containerd/io.containerd.runtime.v2.task/k8s.io/e13b8c85c8b7c9775de3d55b828a847de79849e8c78188413926e8b230731fb9/log.json --log-format=json wait e13b8c85c8b7c9775de3d55b828a847de79849e8c78188413926e8b230731fb9
ホスト側はContainerに対して特権があるので, このことは「ホストを侵害されてもgVisorがContainerへの侵害を防御してくれる」ということでは ありません. Container内のプロセスが名前空間だけでなくカーネルレベルで隔離されているのでContainerからホストへの侵害がしにくくなっている仕組みをホスト側から見ていることになります.
まとめ
containerdとrunsc(gVisor)をCRI経由で操作してみることができました. runcよりも高いContainer隔離レベルでContainerが実行されるgVisorの特徴も確認することができました.
今回は, gVisorで動作確認が取れているnginxを使用しましたが, 任意のContainer image(アプリケーション)が動作する保証はないので, その点は注意して利用する必要があります.