From 6abc8ca19df0078de17dc38340db3002ed489ce7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Aug 2015 15:20:55 -0400 Subject: [PATCH 1/5] cgroup: define controller file conventions Traditionally, each cgroup controller implemented whatever interface it wanted leading to interfaces which are widely inconsistent. Examining the requirements of the controllers readily yield that there are only a few control schemes shared among all. Two major controllers already had to implement new interface for the unified hierarchy due to significant structural changes. Let's take the chance to establish common conventions throughout all controllers. This patch defines CGROUP_WEIGHT_MIN/DFL/MAX to be used on all weight based control knobs and documents the conventions that controllers should follow on the unified hierarchy. Except for io.weight knob, all existing unified hierarchy knobs are already compliant. A follow-up patch will update io.weight. v2: Added descriptions of min, low and high knobs. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Cc: Li Zefan Cc: Peter Zijlstra --- Documentation/cgroups/unified-hierarchy.txt | 80 ++++++++++++++++++--- include/linux/cgroup.h | 9 +++ 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 86847a7647ab..1ee9caf29e57 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -23,10 +23,13 @@ CONTENTS 5. Other Changes 5-1. [Un]populated Notification 5-2. Other Core Changes - 5-3. Per-Controller Changes - 5-3-1. blkio - 5-3-2. cpuset - 5-3-3. memory + 5-3. Controller File Conventions + 5-3-1. Format + 5-3-2. Control Knobs + 5-4. Per-Controller Changes + 5-4-1. blkio + 5-4-2. cpuset + 5-4-3. memory 6. Planned Changes 6-1. CAP for resource control @@ -372,14 +375,75 @@ supported and the interface files "release_agent" and - The "cgroup.clone_children" file is removed. -5-3. Per-Controller Changes +5-3. Controller File Conventions -5-3-1. blkio +5-3-1. Format + +In general, all controller files should be in one of the following +formats whenever possible. + +- Values only files + + VAL0 VAL1...\n + +- Flat keyed files + + KEY0 VAL0\n + KEY1 VAL1\n + ... + +- Nested keyed files + + KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... + KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... + ... + +For a writeable file, the format for writing should generally match +reading; however, controllers may allow omitting later fields or +implement restricted shortcuts for most common use cases. + +For both flat and nested keyed files, only the values for a single key +can be written at a time. For nested keyed files, the sub key pairs +may be specified in any order and not all pairs have to be specified. + + +5-3-2. Control Knobs + +- Settings for a single feature should generally be implemented in a + single file. + +- In general, the root cgroup should be exempt from resource control + and thus shouldn't have resource control knobs. + +- If a controller implements ratio based resource distribution, the + control knob should be named "weight" and have the range [1, 10000] + and 100 should be the default value. The values are chosen to allow + enough and symmetric bias in both directions while keeping it + intuitive (the default is 100%). + +- If a controller implements an absolute resource guarantee and/or + limit, the control knobs should be named "min" and "max" + respectively. If a controller implements best effort resource + gurantee and/or limit, the control knobs should be named "low" and + "high" respectively. + + In the above four control files, the special token "max" should be + used to represent upward infinity for both reading and writing. + +- If a setting has configurable default value and specific overrides, + the default settings should be keyed with "default" and appear as + the first entry in the file. Specific entries can use "default" as + its value to indicate inheritance of the default value. + + +5-4. Per-Controller Changes + +5-4-1. blkio - blk-throttle becomes properly hierarchical. -5-3-2. cpuset +5-4-2. cpuset - Tasks are kept in empty cpusets after hotplug and take on the masks of the nearest non-empty ancestor, instead of being moved to it. @@ -388,7 +452,7 @@ supported and the interface files "release_agent" and masks of the nearest non-empty ancestor. -5-3-3. memory +5-4-3. memory - use_hierarchy is on by default and the cgroup file for the flag is not created. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..c6bf9d30c270 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,6 +22,15 @@ #ifdef CONFIG_CGROUPS +/* + * All weight knobs on the default hierarhcy should use the following min, + * default and max values. The default value is the logarithmic center of + * MIN and MAX and allows 100x to be expressed in both directions. + */ +#define CGROUP_WEIGHT_MIN 1 +#define CGROUP_WEIGHT_DFL 100 +#define CGROUP_WEIGHT_MAX 10000 + /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; From d0ec4230a077521a984ac604acbd8c15cc12a664 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 5 Aug 2015 16:03:19 -0400 Subject: [PATCH 2/5] cgroup: export cgrp_dfl_root While cgroup subsystems can't be modules, blkcg supports dynamically loadable policies which interact with cgroup core. Export cgrp_dfl_root so that cgroup_on_dfl() can be used in those modules. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- kernel/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..3f6641632f73 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -145,6 +145,7 @@ static const char *cgroup_subsys_name[] = { * part of that cgroup. */ struct cgroup_root cgrp_dfl_root; +EXPORT_SYMBOL_GPL(cgrp_dfl_root); /* * The default hierarchy always exists but is hidden until mounted for the From 731a981e1059dd68c97212ccc9c0e1f169c1a77b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Aug 2015 13:35:42 -0400 Subject: [PATCH 3/5] cgroup: make cftype->private a unsigned long It's pretty unusual to have an int as a private data field and it makes it impossible to carray a pointer value through it. Let's make it an unsigned long. AFAICS, this shouldn't break anything. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..8f5770a7d85a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -318,7 +318,7 @@ struct cftype { * end of cftype array. */ char name[MAX_CFTYPE_NAME]; - int private; + unsigned long private; /* * If not 0, file mode is set to this value, otherwise it will * be figured out automatically From d98817d4961b9ef75062d1e129829d283b3dac57 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Aug 2015 13:58:16 -0700 Subject: [PATCH 4/5] cgroup: don't print subsystems for the default hierarchy It doesn't make sense to print subsystems on mount option or /proc/PID/cgroup for the default hierarchy. * cgroup.controllers file at the root of the default hierarchy lists the currently attached controllers. * The default hierarchy is catch-all for unmounted subsystems. * The default hierarchy doesn't accept any mount options. Suppress subsystem printing on mount options and /proc/PID/cgroup for the default hierarchy. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner Cc: cgroups@vger.kernel.org --- kernel/cgroup.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3f6641632f73..d9f854defa78 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1333,9 +1333,10 @@ static int cgroup_show_options(struct seq_file *seq, struct cgroup_subsys *ss; int ssid; - for_each_subsys(ss, ssid) - if (root->subsys_mask & (1 << ssid)) - seq_printf(seq, ",%s", ss->name); + if (root != &cgrp_dfl_root) + for_each_subsys(ss, ssid) + if (root->subsys_mask & (1 << ssid)) + seq_printf(seq, ",%s", ss->name); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -5137,9 +5138,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, continue; seq_printf(m, "%d:", root->hierarchy_id); - for_each_subsys(ss, ssid) - if (root->subsys_mask & (1 << ssid)) - seq_printf(m, "%s%s", count++ ? "," : "", ss->name); + if (root != &cgrp_dfl_root) + for_each_subsys(ss, ssid) + if (root->subsys_mask & (1 << ssid)) + seq_printf(m, "%s%s", count++ ? "," : "", + ss->name); if (strlen(root->name)) seq_printf(m, "%sname=%s", count ? "," : "", root->name); From 3e1d2eed39d804e48282931835c7203fa47fe1d9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Aug 2015 13:58:16 -0700 Subject: [PATCH 5/5] cgroup: introduce cgroup_subsys->legacy_name This allows cgroup subsystems to use a different name on the unified hierarchy. cgroup_subsys->name is used on the unified hierarchy, ->legacy_name elsewhere. If ->legacy_name is not explicitly set, it's automatically set to ->name and the userland visible behavior remains unchanged. v2: Make parse_cgroupfs_options() only consider ->legacy_name as mount options are used only on legacy hierarchies. Suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner Cc: cgroups@vger.kernel.org --- include/linux/cgroup-defs.h | 3 +++ kernel/cgroup.c | 29 ++++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8f5770a7d85a..7d0bb53e4553 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -434,6 +434,9 @@ struct cgroup_subsys { int id; const char *name; + /* optional, initialized automatically during boot if not set */ + const char *legacy_name; + /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9f854defa78..9c8eb17d593a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1028,10 +1028,13 @@ static const struct file_operations proc_cgroupstats_operations; static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, char *buf) { + struct cgroup_subsys *ss = cft->ss; + if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s", - cft->ss->name, cft->name); + cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, + cft->name); else strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX); return buf; @@ -1336,7 +1339,7 @@ static int cgroup_show_options(struct seq_file *seq, if (root != &cgrp_dfl_root) for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) - seq_printf(seq, ",%s", ss->name); + seq_printf(seq, ",%s", ss->legacy_name); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1449,7 +1452,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) } for_each_subsys(ss, i) { - if (strcmp(token, ss->name)) + if (strcmp(token, ss->legacy_name)) continue; if (ss->disabled) continue; @@ -4995,6 +4998,8 @@ int __init cgroup_init_early(void) ss->id = i; ss->name = cgroup_subsys_name[i]; + if (!ss->legacy_name) + ss->legacy_name = cgroup_subsys_name[i]; if (ss->early_init) cgroup_init_subsys(ss, true); @@ -5142,7 +5147,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) seq_printf(m, "%s%s", count++ ? "," : "", - ss->name); + ss->legacy_name); if (strlen(root->name)) seq_printf(m, "%sname=%s", count ? "," : "", root->name); @@ -5182,7 +5187,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) for_each_subsys(ss, i) seq_printf(m, "%s\t%d\t%d\t%d\n", - ss->name, ss->root->hierarchy_id, + ss->legacy_name, ss->root->hierarchy_id, atomic_read(&ss->root->nr_cgrps), !ss->disabled); mutex_unlock(&cgroup_mutex); @@ -5404,12 +5409,14 @@ static int __init cgroup_disable(char *str) continue; for_each_subsys(ss, i) { - if (!strcmp(token, ss->name)) { - ss->disabled = 1; - printk(KERN_INFO "Disabling %s control group" - " subsystem\n", ss->name); - break; - } + if (strcmp(token, ss->name) && + strcmp(token, ss->legacy_name)) + continue; + + ss->disabled = 1; + printk(KERN_INFO "Disabling %s control group subsystem\n", + ss->name); + break; } } return 1;