forked from sonic-net/sonic-buildimage
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add driver-l3mdev-cgroup patch Signed-off-by: Guohan Lu <gulv@microsoft.com> * update abiname to 8-1 Signed-off-by: Guohan Lu <gulv@microsoft.com>
- Loading branch information
Showing
6 changed files
with
386 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
enable CONFIG_CGROUP_L3MDEV=y | ||
|
||
From: Guohan Lu <gulv@microsoft.com> | ||
|
||
|
||
--- | ||
debian/build/build_amd64_none_amd64/.config | 1 + | ||
1 file changed, 1 insertion(+) | ||
|
||
diff --git a/debian/build/build_amd64_none_amd64/.config b/debian/build/build_amd64_none_amd64/.config | ||
index b7bcf15..a8d64a7 100644 | ||
--- a/debian/build/build_amd64_none_amd64/.config | ||
+++ b/debian/build/build_amd64_none_amd64/.config | ||
@@ -1493,6 +1492,7 @@ CONFIG_MPLS_IPTUNNEL=m | ||
# CONFIG_HSR is not set | ||
# CONFIG_NET_SWITCHDEV is not set | ||
CONFIG_NET_L3_MASTER_DEV=y | ||
+CONFIG_CGROUP_L3MDEV=y | ||
# CONFIG_NET_NCSI is not set | ||
CONFIG_RPS=y | ||
CONFIG_RFS_ACCEL=y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,338 @@ | ||
Add cgroup to assoicate tasks with L3 networking domains. AF_INET{6} | ||
sockets opened by tasks associated with an l3mdev cgroup are bound to | ||
the associated master device when the socket is created. This allows a | ||
user to run a command (and its children) within an L3 networking context. | ||
|
||
The master-device for an l3mdev cgroup must be an L3 master device | ||
(e.g., VRF), and it must be set before attaching tasks to the cgroup. Once | ||
set the master-device can not change. Nested l3mdev cgroups are not | ||
supported. The root (aka default) l3mdev cgroup can not be bound to a | ||
master device. | ||
|
||
Example: | ||
ip link add vrf-red type vrf table vrf-red | ||
ip link set dev vrf-red up | ||
ip link set dev eth1 master vrf-red | ||
|
||
cgcreate -g l3mdev:vrf-red | ||
cgset -r l3mdev.master-device=vrf-red vrf-red | ||
cgexec -g l3mdev:vrf-red bash | ||
|
||
At this point the current shell and its child processes are attached to | ||
the vrf-red L3 domain. Any AF_INET and AF_INET6 sockets opened by the | ||
tasks are bound to the vrf-red device. | ||
|
||
TO-DO: | ||
- how to auto-create the cgroup when a VRF device is created and auto-deleted | ||
when a VRF device is destroyed | ||
|
||
Signed-off-by: David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org> | ||
--- | ||
include/linux/cgroup_subsys.h | 4 + | ||
include/net/l3mdev_cgroup.h | 27 ++++++ | ||
net/core/sock.c | 2 | ||
net/l3mdev/Kconfig | 12 +++ | ||
net/l3mdev/Makefile | 1 | ||
net/l3mdev/l3mdev_cgroup.c | 195 +++++++++++++++++++++++++++++++++++++++++ | ||
6 files changed, 241 insertions(+) | ||
create mode 100644 include/net/l3mdev_cgroup.h | ||
create mode 100644 net/l3mdev/l3mdev_cgroup.c | ||
|
||
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h | ||
index 0df0336..72c220a 100644 | ||
--- a/include/linux/cgroup_subsys.h | ||
+++ b/include/linux/cgroup_subsys.h | ||
@@ -48,6 +48,10 @@ SUBSYS(perf_event) | ||
SUBSYS(net_prio) | ||
#endif | ||
|
||
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) | ||
+SUBSYS(l3mdev) | ||
+#endif | ||
+ | ||
#if IS_ENABLED(CONFIG_CGROUP_HUGETLB) | ||
SUBSYS(hugetlb) | ||
#endif | ||
diff --git a/include/net/l3mdev_cgroup.h b/include/net/l3mdev_cgroup.h | ||
new file mode 100644 | ||
index 0000000..7d2bef1 | ||
--- /dev/null | ||
+++ b/include/net/l3mdev_cgroup.h | ||
@@ -0,0 +1,27 @@ | ||
+/* | ||
+ * l3mdev_cgroup.h Control Group for L3 Master Device | ||
+ * | ||
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved. | ||
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org> | ||
+ * | ||
+ * This program is free software; you can redistribute it and/or modify | ||
+ * it under the terms of the GNU General Public License as published by | ||
+ * the Free Software Foundation; either version 2 of the License, or | ||
+ * (at your option) any later version. | ||
+ */ | ||
+ | ||
+#ifndef _L3MDEV_CGROUP_H | ||
+#define _L3MDEV_CGROUP_H | ||
+ | ||
+#if IS_ENABLED(CONFIG_CGROUP_L3MDEV) | ||
+ | ||
+void sock_update_l3mdev(struct sock *sk); | ||
+ | ||
+#else /* !CONFIG_CGROUP_L3MDEV */ | ||
+ | ||
+static inline void sock_update_l3mdev(struct sock *sk) | ||
+{ | ||
+} | ||
+ | ||
+#endif /* CONFIG_CGROUP_L3MDEV */ | ||
+#endif /* _L3MDEV_CGROUP_H */ | ||
diff --git a/net/core/sock.c b/net/core/sock.c | ||
index 1c4c434..ebb25ca 100644 | ||
--- a/net/core/sock.c | ||
+++ b/net/core/sock.c | ||
@@ -131,6 +131,7 @@ | ||
#include <linux/ipsec.h> | ||
#include <net/cls_cgroup.h> | ||
#include <net/netprio_cgroup.h> | ||
+#include <net/l3mdev_cgroup.h> | ||
#include <linux/sock_diag.h> | ||
|
||
#include <linux/filter.h> | ||
@@ -1402,6 +1403,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | ||
cgroup_sk_alloc(&sk->sk_cgrp_data); | ||
sock_update_classid(&sk->sk_cgrp_data); | ||
sock_update_netprioidx(&sk->sk_cgrp_data); | ||
+ sock_update_l3mdev(sk); | ||
} | ||
|
||
return sk; | ||
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig | ||
index 5d47325..3142d81 100644 | ||
--- a/net/l3mdev/Kconfig | ||
+++ b/net/l3mdev/Kconfig | ||
@@ -8,3 +8,15 @@ config NET_L3_MASTER_DEV | ||
---help--- | ||
This module provides glue between core networking code and device | ||
drivers to support L3 master devices like VRF. | ||
+ | ||
+config CGROUP_L3MDEV | ||
+ bool "L3 Master Device cgroup" | ||
+ depends on CGROUPS | ||
+ depends on NET_L3_MASTER_DEV | ||
+ ---help--- | ||
+ Cgroup subsystem for assigning processes to an L3 domain. | ||
+ When a process is assigned to an l3mdev domain all AF_INET and | ||
+ AF_INET6 sockets opened by the process are bound to the L3 master | ||
+ device. | ||
+ | ||
+ | ||
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile | ||
index 84a53a6..ae74eba 100644 | ||
--- a/net/l3mdev/Makefile | ||
+++ b/net/l3mdev/Makefile | ||
@@ -3,3 +3,4 @@ | ||
# | ||
|
||
obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o | ||
+obj-$(CONFIG_CGROUP_L3MDEV) += l3mdev_cgroup.o | ||
diff --git a/net/l3mdev/l3mdev_cgroup.c b/net/l3mdev/l3mdev_cgroup.c | ||
new file mode 100644 | ||
index 0000000..b5fea03 | ||
--- /dev/null | ||
+++ b/net/l3mdev/l3mdev_cgroup.c | ||
@@ -0,0 +1,195 @@ | ||
+/* | ||
+ * net/l3mdev/l3mdev_cgroup.c Control Group for L3 Master Devices | ||
+ * | ||
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved. | ||
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org> | ||
+ * | ||
+ * This program is free software; you can redistribute it and/or modify | ||
+ * it under the terms of the GNU General Public License as published by | ||
+ * the Free Software Foundation; either version 2 of the License, or | ||
+ * (at your option) any later version. | ||
+ */ | ||
+ | ||
+#include <linux/module.h> | ||
+#include <linux/slab.h> | ||
+#include <linux/types.h> | ||
+#include <linux/string.h> | ||
+#include <linux/cgroup.h> | ||
+#include <net/sock.h> | ||
+#include <net/l3mdev_cgroup.h> | ||
+ | ||
+struct l3mdev_cgroup { | ||
+ struct cgroup_subsys_state css; | ||
+ struct net *net; | ||
+ int dev_idx; | ||
+}; | ||
+ | ||
+static inline struct l3mdev_cgroup *css_l3mdev(struct cgroup_subsys_state *css) | ||
+{ | ||
+ return css ? container_of(css, struct l3mdev_cgroup, css) : NULL; | ||
+} | ||
+ | ||
+static void l3mdev_set_bound_dev(struct sock *sk) | ||
+{ | ||
+ struct task_struct *tsk = current; | ||
+ struct l3mdev_cgroup *l3mdev_cgrp; | ||
+ | ||
+ rcu_read_lock(); | ||
+ | ||
+ l3mdev_cgrp = css_l3mdev(task_css(tsk, l3mdev_cgrp_id)); | ||
+ if (l3mdev_cgrp && l3mdev_cgrp->dev_idx) | ||
+ sk->sk_bound_dev_if = l3mdev_cgrp->dev_idx; | ||
+ | ||
+ rcu_read_unlock(); | ||
+} | ||
+ | ||
+void sock_update_l3mdev(struct sock *sk) | ||
+{ | ||
+ switch (sk->sk_family) { | ||
+ case AF_INET: | ||
+ case AF_INET6: | ||
+ l3mdev_set_bound_dev(sk); | ||
+ break; | ||
+ } | ||
+} | ||
+ | ||
+static bool is_root_cgroup(struct cgroup_subsys_state *css) | ||
+{ | ||
+ return !css || !css->parent; | ||
+} | ||
+ | ||
+static struct cgroup_subsys_state * | ||
+l3mdev_css_alloc(struct cgroup_subsys_state *parent_css) | ||
+{ | ||
+ struct l3mdev_cgroup *l3mdev_cgrp; | ||
+ | ||
+ /* nested l3mdev domains are not supportd */ | ||
+ if (!is_root_cgroup(parent_css)) | ||
+ return ERR_PTR(-EINVAL); | ||
+ | ||
+ l3mdev_cgrp = kzalloc(sizeof(*l3mdev_cgrp), GFP_KERNEL); | ||
+ if (!l3mdev_cgrp) | ||
+ return ERR_PTR(-ENOMEM); | ||
+ | ||
+ return &l3mdev_cgrp->css; | ||
+} | ||
+ | ||
+static int l3mdev_css_online(struct cgroup_subsys_state *css) | ||
+{ | ||
+ return 0; | ||
+} | ||
+ | ||
+static void l3mdev_css_free(struct cgroup_subsys_state *css) | ||
+{ | ||
+ kfree(css_l3mdev(css)); | ||
+} | ||
+ | ||
+static int l3mdev_read(struct seq_file *sf, void *v) | ||
+{ | ||
+ struct cgroup_subsys_state *css = seq_css(sf); | ||
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css); | ||
+ | ||
+ if (!l3mdev_cgrp) | ||
+ return -EINVAL; | ||
+ | ||
+ if (l3mdev_cgrp->net) { | ||
+ struct net_device *dev; | ||
+ | ||
+ dev = dev_get_by_index(l3mdev_cgrp->net, l3mdev_cgrp->dev_idx); | ||
+ | ||
+ seq_printf(sf, "net[%u]: device index %d ==> %s\n", | ||
+ l3mdev_cgrp->net->ns.inum, l3mdev_cgrp->dev_idx, | ||
+ dev ? dev->name : "<none>"); | ||
+ | ||
+ if (dev) | ||
+ dev_put(dev); | ||
+ } | ||
+ return 0; | ||
+} | ||
+ | ||
+static ssize_t l3mdev_write(struct kernfs_open_file *of, | ||
+ char *buf, size_t nbytes, loff_t off) | ||
+{ | ||
+ struct cgroup_subsys_state *css = of_css(of); | ||
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css); | ||
+ struct net *net = current->nsproxy->net_ns; | ||
+ struct net_device *dev; | ||
+ char name[IFNAMSIZ]; | ||
+ int rc = -EINVAL; | ||
+ | ||
+ /* once master device is set can not undo. Must delete | ||
+ * cgroup and reset | ||
+ */ | ||
+ if (l3mdev_cgrp->dev_idx) | ||
+ goto out; | ||
+ | ||
+ /* root cgroup does not bind to an L3 domain */ | ||
+ if (is_root_cgroup(css)) | ||
+ goto out; | ||
+ | ||
+ if (sscanf(buf, "%" __stringify(IFNAMSIZ) "s", name) != 1) | ||
+ goto out; | ||
+ | ||
+ dev = dev_get_by_name(net, name); | ||
+ if (!dev) { | ||
+ rc = -ENODEV; | ||
+ goto out; | ||
+ } | ||
+ | ||
+ if (netif_is_l3_master(dev)) { | ||
+ l3mdev_cgrp->net = net; | ||
+ l3mdev_cgrp->dev_idx = dev->ifindex; | ||
+ rc = 0; | ||
+ } | ||
+ | ||
+ dev_put(dev); | ||
+out: | ||
+ return rc ? : nbytes; | ||
+} | ||
+ | ||
+/* make master device is set for non-root cgroups before tasks can be added */ | ||
+static int l3mdev_can_attach(struct cgroup_taskset *tset) | ||
+{ | ||
+ struct cgroup_subsys_state *dst_css; | ||
+ struct task_struct *tsk; | ||
+ int rc = 0; | ||
+ | ||
+ cgroup_taskset_for_each(tsk, dst_css, tset) { | ||
+ struct l3mdev_cgroup *l3mdev_cgrp; | ||
+ | ||
+ l3mdev_cgrp = css_l3mdev(dst_css); | ||
+ if (!is_root_cgroup(dst_css) && !l3mdev_cgrp->dev_idx) { | ||
+ rc = -ENODEV; | ||
+ break; | ||
+ } | ||
+ } | ||
+ | ||
+ return rc; | ||
+} | ||
+ | ||
+static struct cftype ss_files[] = { | ||
+ { | ||
+ .name = "master-device", | ||
+ .seq_show = l3mdev_read, | ||
+ .write = l3mdev_write, | ||
+ }, | ||
+ { } /* terminate */ | ||
+}; | ||
+ | ||
+struct cgroup_subsys l3mdev_cgrp_subsys = { | ||
+ .css_alloc = l3mdev_css_alloc, | ||
+ .css_online = l3mdev_css_online, | ||
+ .css_free = l3mdev_css_free, | ||
+ .can_attach = l3mdev_can_attach, | ||
+ .legacy_cftypes = ss_files, | ||
+}; | ||
+ | ||
+static int __init init_cgroup_l3mdev(void) | ||
+{ | ||
+ return 0; | ||
+} | ||
+ | ||
+subsys_initcall(init_cgroup_l3mdev); | ||
+MODULE_AUTHOR("David Ahern"); | ||
+MODULE_DESCRIPTION("Control Group for L3 Networking Domains"); | ||
+MODULE_LICENSE("GPL"); |
Oops, something went wrong.