Net namespaces may be broken on 3.14 |
|
Issue description
OS: 3.14 kernel, ToT pointing to commit " 7e7d95d - CHROMIUM: sysrq: replay the correct keys when releasing SysRq combo".
What steps will reproduce the problem?
(1) Load kernel on samus-cheets with head pointing to this commit.
(2) Boot samus, type 'ip netns add blue' (to start new namespace called blue) in cros root shell.
(3) Type 'file /proc/self/ns/net'
What is the expected output?
Should be able to create new namespaces and exec commands within them.
What do you see instead?
Commands that start with 'ip netns' fail with 'request send failed: Invalid argument'. strace output below.
localhost / # file /proc/self/ns/net
/proc/self/ns/net: broken symbolic link to net:[4026531956]
localhost / # strace ip netns add blue
execve("/bin/ip", ["ip", "netns", "add", "blue"], [/* 19 vars */]) = 0
brk(0) = 0x7f74356e7000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f74337bf000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=40023, ...}) = 0
mmap(NULL, 40023, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f74337b5000
close(3) = 0
open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300\16\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=14440, ...}) = 0
mmap(NULL, 2109584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f743339b000
mprotect(0x7f743339e000, 2093056, PROT_NONE) = 0
mmap(0x7f743359d000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f743359d000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\0\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1791720, ...}) = 0
mmap(NULL, 3900568, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7432fe2000
mprotect(0x7f7433191000, 2093056, PROT_NONE) = 0
mmap(0x7f7433390000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1ae000) = 0x7f7433390000
mmap(0x7f7433396000, 17560, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7433396000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f74337b4000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f74337b3000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f74337b2000
arch_prctl(ARCH_SET_FS, 0x7f74337b3700) = 0
mprotect(0x7f7433390000, 16384, PROT_READ) = 0
mprotect(0x7f743359d000, 4096, PROT_READ) = 0
mprotect(0x7f7433818000, 8192, PROT_READ) = 0
mprotect(0x7f74337c0000, 4096, PROT_READ) = 0
munmap(0x7f74337b5000, 40023) = 0
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, pid=30429, groups=00000000}, [12]) = 0
open("/proc/self/ns/net", O_RDONLY) = 4
sendto(3, "\34\0\0\0Z\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\3\0\4\0\0\0", 28, 0, NULL, 0) = -1 EINVAL (Invalid argument)
dup(2) = 5
fcntl(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
brk(0) = 0x7f74356e7000
brk(0x7f7435708000) = 0x7f7435708000
fstat(5, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f74337be000
lseek(5, 0, SEEK_CUR) = -1 ESPIPE (Illegal seek)
write(5, "request send failed: Invalid arg"..., 38request send failed: Invalid argument
) = 38
close(5) = 0
munmap(0x7f74337be000, 4096) = 0
exit_group(1) = ?
+++ exited with 1 +++
,
Aug 2 2016
Kevin, I think it is already in 'failure' mode by then. The socket syscall is returning 3 which is ESRCH? And even after that, the open to /proc/self/ns/net will fail because its a broken symlink (on my linux system as well as CrOS - irrespective of whether you are in actual system or container/chroot).
Confirmed that I see the same thing on a dev channel build:
localhost / # cat /etc/lsb-release
CHROMEOS_RELEASE_APPID={F67500C1-C6D8-5287-E4EC-F9BBB4AEE5C5}
CHROMEOS_BOARD_APPID={F67500C1-C6D8-5287-E4EC-F9BBB4AEE5C5}
CHROMEOS_CANARY_APPID={90F229CE-83E2-4FAF-8479-E368A34938B1}
DEVICETYPE=CHROMEBOOK
CHROMEOS_ARC_VERSION=3102164
CHROMEOS_RELEASE_BOARD=samus
CHROMEOS_DEVSERVER=
GOOGLE_RELEASE=8530.35.0
CHROMEOS_RELEASE_BUILD_NUMBER=8530
CHROMEOS_RELEASE_BRANCH_NUMBER=35
CHROMEOS_RELEASE_CHROME_MILESTONE=53
CHROMEOS_RELEASE_PATCH_NUMBER=0
CHROMEOS_RELEASE_TRACK=testimage-channel
CHROMEOS_RELEASE_DESCRIPTION=8530.35.0 (Official Build) dev-channel samus test
CHROMEOS_RELEASE_BUILD_TYPE=Official Build
CHROMEOS_RELEASE_NAME=Chrome OS
CHROMEOS_RELEASE_VERSION=8530.35.0
CHROMEOS_AUSERVER=https://tools.google.com/service/update2
localhost / # uname -a
Linux localhost 3.14.0 #25 SMP PREEMPT Tue Aug 2 10:32:04 PDT 2016 x86_64 Intel(R) Core(TM) i3-5005U CPU @ 2.00GHz GenuineIntel GNU/Linux
localhost / # mount -o remount,rw /
localhost / # ip netns add blue
request send failed: Invalid argument
This is a bit perplexing because I was able to run these commands on ToT earlier today.
,
Aug 2 2016
> The socket syscall is returning 3 which is ESRCH? It's returning a valid file descriptor. You'll typically see -1 <decoded_errno>, similar to lseek above, if a syscall failed. > And even after that, the open to /proc/self/ns/net will fail because its a broken symlink Yeah, those /proc symlinks can be pretty weird. But I think it's normal in this case. /proc/PID/fd can also contain intentionally broken symlinks for e.g. sockets and pipes. Since there's no good way to represent them as filesystem paths.
,
Aug 2 2016
The real issue is here: sendto(3, "\34\0\0\0Z\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\3\0\4\0\0\0", 28, 0, NULL, 0) The config command sent over netlink socket failed for some reason.
,
Aug 2 2016
So I realized that this has not been tested before on 3.14. When I did it in the past to verify net-namespace specific CLs, it was for 3.18 and 4.4 (since we have 3.14 running the Android container).
Here's a good case trace from a 3.18 kernel:
execve("/bin/ip", ["ip", "netns", "add", "purple"], [/* 22 vars */]) = 0
brk(0) = 0x7f83fa70c000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f83fa618000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/tls/x86_64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/tls/x86_64", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/tls/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/tls", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/x86_64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/x86_64", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
open("/usr/local/lib64/tls/x86_64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib64/tls/x86_64", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib64/tls/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib64/tls", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib64/x86_64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib64/x86_64", 0x7ffd3c562380) = -1 ENOENT (No such file or directory)
open("/usr/local/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib64", {st_mode=S_IFDIR|0755, st_size=12288, ...}) = 0
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=39285, ...}) = 0
mmap(NULL, 39285, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f83fa60e000
close(3) = 0
open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300\16\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=14440, ...}) = 0
mmap(NULL, 2109584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f83fa1f4000
mprotect(0x7f83fa1f7000, 2093056, PROT_NONE) = 0
mmap(0x7f83fa3f6000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f83fa3f6000
close(3) = 0
open("/usr/local/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
open("/usr/local/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\0\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1791720, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f83fa60d000
mmap(NULL, 3900568, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f83f9e3b000
mprotect(0x7f83f9fea000, 2093056, PROT_NONE) = 0
mmap(0x7f83fa1e9000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1ae000) = 0x7f83fa1e9000
mmap(0x7f83fa1ef000, 17560, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f83fa1ef000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f83fa60c000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f83fa60b000
arch_prctl(ARCH_SET_FS, 0x7f83fa60c700) = 0
mprotect(0x7f83fa1e9000, 16384, PROT_READ) = 0
mprotect(0x7f83fa3f6000, 4096, PROT_READ) = 0
mprotect(0x7f83fa671000, 8192, PROT_READ) = 0
mprotect(0x7f83fa619000, 4096, PROT_READ) = 0
munmap(0x7f83fa60e000, 39285) = 0
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, pid=8654, groups=00000000}, [12]) = 0
open("/proc/self/ns/net", O_RDONLY) = 4
sendto(3, "\34\0\0\0Z\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\3\0\4\0\0\0", 28, 0, NULL, 0) = 28
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\34\0\0\0Z\0\0\0\0\0\0\0\316!\0\0\0\0\0\0\10\0\1\0\377\377\377\377", 16384}], msg_controllen=0, msg_flags=0}, 0) = 28
close(4) = 0
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 4
setsockopt(4, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(4, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(4, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(4, {sa_family=AF_NETLINK, pid=-4158, groups=00000000}, [12]) = 0
openat(AT_FDCWD, "/run/netns", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 5
fcntl(5, F_GETFD) = 0x1 (flags FD_CLOEXEC)
brk(0) = 0x7f83fa70c000
brk(0x7f83fa735000) = 0x7f83fa735000
getdents(5, /* 3 entries */, 32768) = 80
open("/run/netns/yellow", O_RDONLY) = 6
sendmsg(4, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\34\0\0\0Z\0\1\0\346\372\240W\0\0\0\0\0\0\0\0\10\0\3\0\6\0\0\0", 28}], msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(4, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\34\0\0\0Z\0\0\0\346\372\240W\302\357\377\377\0\0\0\0\10\0\1\0\377\377\377\377", 32768}], msg_controllen=0, msg_flags=0}, 0) = 28
close(6) = 0
getdents(5, /* 0 entries */, 32768) = 0
brk(0x7f83fa72d000) = 0x7f83fa72d000
close(5) = 0
mkdir("/run/netns", 0755) = -1 EEXIST (File exists)
mount("", "/run/netns", "none", MS_REC|MS_SHARED, NULL) = 0
open("/run/netns/purple", O_RDONLY|O_CREAT|O_EXCL, 0) = 5
close(5) = 0
unshare(CLONE_NEWNET) = 0
mount("/proc/self/ns/net", "/run/netns/purple", 0x7f83fa66cd25, MS_BIND, NULL) = 0
exit_group(0) = ?
+++ exited with 0 +++
,
Aug 2 2016
This is a bug in ip netns support:
4c7d9a58 (Nicolas Dichtel 2015-04-13 10:34:26 +0200 81) if (rtnl_send(&rth, &req.n, req.n.nlmsg_len) < 0) {
4c7d9a58 (Nicolas Dichtel 2015-04-13 10:34:26 +0200 82) perror("request send failed");
4c7d9a58 (Nicolas Dichtel 2015-04-13 10:34:26 +0200 83) exit(1);
4c7d9a58 (Nicolas Dichtel 2015-04-13 10:34:26 +0200 84) }
They want to test if kernel supports RTM_GETNSID command but instead of returning "no support" they kill the process.
|
|
►
Sign in to add a comment |
|
Comment 1 by cernekee@chromium.org
, Aug 2 2016