1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104 | #!/bin/sh
set -eux
if [ $(id -u) != 0 ]; then
echo "use sudo"
exit 1
fi
echo "The outer mount namespace is: $(readlink /proc/self/ns/mnt)"
# Create a temporary directory where we will assemble our root filesystem. This
# directory is cleaned up as a part of the execution flow below.
ROOTFS=$(mktemp -d /tmp/hackery.XXXXXX)
# Unshare the mount namespace with "shared" propagation.
# We need shared propagation to have any chance of exerting
# influence on the parent namespace.
#
# The strace helps us understand what is happening in the terms
# of the essential system calls.
ROOTFS="$ROOTFS" \
exec strace -qq -f \
-o hackery.strace \
-e signal= \
-e trace=unshare,setns,mount,umount2,umount,pivot_root,rmdir,mkdir \
unshare -m --propagation shared sh -c "
set -eux
# Bind mount the temporary directory for rootfs over itself so that it is a
# mount point. This is done so that it can become unbindable in the next line.
mount --bind $ROOTFS $ROOTFS
# Make the temporary directory for rootfs unbindable. This is required as
# otherwise we'd have an infinite cycle since $ROOTFS/$ROOTFS would be / again
# and the cycle would have no limit.
mount --make-unbindable $ROOTFS
# Bind mount / over $ROOTFS, this lets us have a distinctly mapped root
# directory that starts out as the genuine root directory of the host. In real
# version of snap-confine this would be the core snap instead of the root
# filesystem.
mount --rbind / $ROOTFS
# Make the temporary rootfs recursively private so that all the mount
# operations there cannot leak to the parent namespace (which we are sharing
# initially after unshare --propagation shared). This is the essential part of
# confinement that lets processes roam free but still without any chance to
# affect the outer namespace.
# LOOPHOLE: can this be undone with --make-rshared after that or will that be
# both private and shared (private for the master, shared for any additional
# namespaces)?
mount --make-rprivate $ROOTFS
# Recursively bind mount /media into the temporary rootfs's version of /media.
# This essentially lets us see anything that is mounted there *and*
# mount/unmount anything that we want in a way that is visible outside.
# TODO: check what is the initial sharing of this in the kernel docs (there's a
# nice table in shared-subtrees.txt). This affects the next call.
mount --rbind /media $ROOTFS/media
# NOTE: disabled because it is obsolete, the default sharing is sufficient
# Make the /media directory in the temporary rootfs recursively shared. This
# lets us change anything there and have the events propagate all the way up.
# mount --make-rshared $ROOTFS/media
# Pivot root so that the temporary root directory becomes the real root directory
# and that the old root directory is moved to $ROOTFS/old-root.
#
# NOTE: this pivot works because old-root is in a private peer group!
# This is quite essential.
pivot_root $ROOTFS $ROOTFS/var/lib/snapd/hostfs
# Get rid of the self-bind mount and the temporary directory from the old root
# filesystem. Note that we don't need any child processes for this :)
#
# The unmount must use the old rootfs location as /tmp is privately shared now
# so this wouldn't do what we want (it would still be bind mounted on the
# outside).
umount /var/lib/snapd/hostfs/$ROOTFS
# The rmdir can just use the vanilla location because it is now a regular
# directory. If this was snap-confine then this should happen before we put a
# private tmpfs over /tmp.
rmdir $ROOTFS
# Make this old-root rprivate so that the next MNT_DETACH-based unmount below
# does not hose the parent. Without it we seem to detach and propagate this too
# the main namespace which ends with unhappy systemd (systemd isn't happy
# without a root filesystem).
mount --make-rprivate /var/lib/snapd/hostfs
# Detach the old rootfs from this mount namespace. Don't be fooled, it is still
# mounted, we just don't see it.
#
# NOTE: we might wish to preserve the old rootfs (and not unmount it) but it
# has to be instead converted to a slave mount or many bad things can happen
# (all the mount events there would propagate to the real hostfs which is a lot
# of power!).
umount -l /var/lib/snapd/hostfs
echo \"The inner mount namespace is: \$(readlink /proc/self/ns/mnt)\"
# Let's play :-)
exec bash
"
|