forked from mirrors/linux
		
	netfilter: nft_socket: make cgroupsv2 matching work with namespaces
When running in container environmment, /sys/fs/cgroup/ might not be
the real root node of the sk-attached cgroup.
Example:
In container:
% stat /sys//fs/cgroup/
Device: 0,21    Inode: 2214  ..
% stat /sys/fs/cgroup/foo
Device: 0,21    Inode: 2264  ..
The expectation would be for:
  nft add rule .. socket cgroupv2 level 1 "foo" counter
to match traffic from a process that got added to "foo" via
"echo $pid > /sys/fs/cgroup/foo/cgroup.procs".
However, 'level 3' is needed to make this work.
Seen from initial namespace, the complete hierarchy is:
% stat /sys/fs/cgroup/system.slice/docker-.../foo
  Device: 0,21    Inode: 2264 ..
i.e. hierarchy is
0    1               2              3
/ -> system.slice -> docker-1... -> foo
... but the container doesn't know that its "/" is the "docker-1.."
cgroup.  Current code will retrieve the 'system.slice' cgroup node
and store its kn->id in the destination register, so compare with
2264 ("foo" cgroup id) will not match.
Fetch "/" cgroup from ->init() and add its level to the level we try to
extract.  cgroup root-level is 0 for the init-namespace or the level
of the ancestor that is exposed as the cgroup root inside the container.
In the above case, cgrp->level of "/" resolved in the container is 2
(docker-1...scope/) and request for 'level 1' will get adjusted
to fetch the actual level (3).
v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it.
    (kernel test robot)
Cc: cgroups@vger.kernel.org
Fixes: e0bb96db96 ("netfilter: nft_socket: add support for cgroupsv2")
Reported-by: Nadia Pinaeva <n.m.pinaeva@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
			
			
This commit is contained in:
		
							parent
							
								
									8b26ff7af8
								
							
						
					
					
						commit
						7f3287db65
					
				
					 1 changed files with 38 additions and 3 deletions
				
			
		| 
						 | 
					@ -9,7 +9,8 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct nft_socket {
 | 
					struct nft_socket {
 | 
				
			||||||
	enum nft_socket_keys		key:8;
 | 
						enum nft_socket_keys		key:8;
 | 
				
			||||||
	u8				level;
 | 
						u8				level;		/* cgroupv2 level to extract */
 | 
				
			||||||
 | 
						u8				level_user;	/* cgroupv2 level provided by userspace */
 | 
				
			||||||
	u8				len;
 | 
						u8				len;
 | 
				
			||||||
	union {
 | 
						union {
 | 
				
			||||||
		u8			dreg;
 | 
							u8			dreg;
 | 
				
			||||||
| 
						 | 
					@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
 | 
				
			||||||
	memcpy(dest, &cgid, sizeof(u64));
 | 
						memcpy(dest, &cgid, sizeof(u64));
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* process context only, uses current->nsproxy. */
 | 
				
			||||||
 | 
					static noinline int nft_socket_cgroup_subtree_level(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct cgroup *cgrp = cgroup_get_from_path("/");
 | 
				
			||||||
 | 
						int level;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cgrp)
 | 
				
			||||||
 | 
							return -ENOENT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						level = cgrp->level;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cgroup_put(cgrp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (WARN_ON_ONCE(level > 255))
 | 
				
			||||||
 | 
							return -ERANGE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (WARN_ON_ONCE(level < 0))
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return level;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
 | 
					static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
 | 
				
			||||||
| 
						 | 
					@ -174,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 | 
				
			||||||
	case NFT_SOCKET_MARK:
 | 
						case NFT_SOCKET_MARK:
 | 
				
			||||||
		len = sizeof(u32);
 | 
							len = sizeof(u32);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
#ifdef CONFIG_CGROUPS
 | 
					#ifdef CONFIG_SOCK_CGROUP_DATA
 | 
				
			||||||
	case NFT_SOCKET_CGROUPV2: {
 | 
						case NFT_SOCKET_CGROUPV2: {
 | 
				
			||||||
		unsigned int level;
 | 
							unsigned int level;
 | 
				
			||||||
 | 
							int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!tb[NFTA_SOCKET_LEVEL])
 | 
							if (!tb[NFTA_SOCKET_LEVEL])
 | 
				
			||||||
			return -EINVAL;
 | 
								return -EINVAL;
 | 
				
			||||||
| 
						 | 
					@ -185,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 | 
				
			||||||
		if (level > 255)
 | 
							if (level > 255)
 | 
				
			||||||
			return -EOPNOTSUPP;
 | 
								return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							err = nft_socket_cgroup_subtree_level();
 | 
				
			||||||
 | 
							if (err < 0)
 | 
				
			||||||
 | 
								return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							priv->level_user = level;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							level += err;
 | 
				
			||||||
 | 
							/* Implies a giant cgroup tree */
 | 
				
			||||||
 | 
							if (WARN_ON_ONCE(level > 255))
 | 
				
			||||||
 | 
								return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		priv->level = level;
 | 
							priv->level = level;
 | 
				
			||||||
		len = sizeof(u64);
 | 
							len = sizeof(u64);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					@ -209,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
 | 
				
			||||||
	if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
 | 
						if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	if (priv->key == NFT_SOCKET_CGROUPV2 &&
 | 
						if (priv->key == NFT_SOCKET_CGROUPV2 &&
 | 
				
			||||||
	    nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
 | 
						    nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue