From 2e7d31704c7fe9141b0b27ec1ece254d9993fe2d Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 4 Dec 2019 16:49:43 -0800 Subject: [PATCH 01/86] mm/kasan/common.c: fix compile error I hit the following compile error in arch/x86/ mm/kasan/common.c: In function kasan_populate_vmalloc: mm/kasan/common.c:797:2: error: implicit declaration of function flush_cache_vmap; did you mean flush_rcu_work? [-Werror=implicit-function-declaration] flush_cache_vmap(shadow_start, shadow_end); ^~~~~~~~~~~~~~~~ flush_rcu_work cc1: some warnings being treated as errors Link: http://lkml.kernel.org/r/1575363013-43761-1-git-send-email-zhongjiang@huawei.com Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") Signed-off-by: zhong jiang Reviewed-by: Andrew Morton Reviewed-by: Daniel Axtens Cc: Mark Rutland Cc: Mark Rutland Cc: Vasily Gorbik Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index df3371d5c572..2fa710bb6358 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -36,6 +36,7 @@ #include #include +#include #include #include "kasan.h" From a264df74df38855096393447f1b8f386069a94b9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 4 Dec 2019 16:49:46 -0800 Subject: [PATCH 02/86] mm: memcg/slab: wait for !root kmem_cache refcnt killing on root kmem_cache destruction Christian reported a warning like the following obtained during running some KVM-related tests on s390: WARNING: CPU: 8 PID: 208 at lib/percpu-refcount.c:108 percpu_ref_exit+0x50/0x58 Modules linked in: kvm(-) xt_CHECKSUM xt_MASQUERADE bonding xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip6table_na> CPU: 8 PID: 208 Comm: kworker/8:1 Not tainted 5.2.0+ #66 Hardware name: IBM 2964 NC9 712 (LPAR) Workqueue: events sysfs_slab_remove_workfn Krnl PSW : 0704e00180000000 0000001529746850 (percpu_ref_exit+0x50/0x58) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 00000000ffff8808 0000001529746740 000003f4e30e8e18 0036008100000000 0000001f00000000 0035008100000000 0000001fb3573ab8 0000000000000000 0000001fbdb6de00 0000000000000000 0000001529f01328 0000001fb3573b00 0000001fbb27e000 0000001fbdb69300 000003e009263d00 000003e009263cd0 Krnl Code: 0000001529746842: f0a0000407fe srp 4(11,%r0),2046,0 0000001529746848: 47000700 bc 0,1792 #000000152974684c: a7f40001 brc 15,152974684e >0000001529746850: a7f4fff2 brc 15,1529746834 0000001529746854: 0707 bcr 0,%r7 0000001529746856: 0707 bcr 0,%r7 0000001529746858: eb8ff0580024 stmg %r8,%r15,88(%r15) 000000152974685e: a738ffff lhi %r3,-1 Call Trace: ([<000003e009263d00>] 0x3e009263d00) [<00000015293252ea>] slab_kmem_cache_release+0x3a/0x70 [<0000001529b04882>] kobject_put+0xaa/0xe8 [<000000152918cf28>] process_one_work+0x1e8/0x428 [<000000152918d1b0>] worker_thread+0x48/0x460 [<00000015291942c6>] kthread+0x126/0x160 [<0000001529b22344>] ret_from_fork+0x28/0x30 [<0000001529b2234c>] kernel_thread_starter+0x0/0x10 Last Breaking-Event-Address: [<000000152974684c>] percpu_ref_exit+0x4c/0x58 ---[ end trace b035e7da5788eb09 ]--- The problem occurs because kmem_cache_destroy() is called immediately after deleting of a memcg, so it races with the memcg kmem_cache deactivation. flush_memcg_workqueue() at the beginning of kmem_cache_destroy() is supposed to guarantee that all deactivation processes are finished, but failed to do so. It waits for an rcu grace period, after which all children kmem_caches should be deactivated. During the deactivation percpu_ref_kill() is called for non root kmem_cache refcounters, but it requires yet another rcu grace period to finish the transition to the atomic (dead) state. So in a rare case when not all children kmem_caches are destroyed at the moment when the root kmem_cache is about to be gone, we need to wait another rcu grace period before destroying the root kmem_cache. This issue can be triggered only with dynamically created kmem_caches which are used with memcg accounting. In this case per-memcg child kmem_caches are created. They are deactivated from the cgroup removing path. If the destruction of the root kmem_cache is racing with the removal of the cgroup (both are quite complicated multi-stage processes), the described issue can occur. The only known way to trigger it in the real life, is to unload some kernel module which creates a dedicated kmem_cache, used from different memory cgroups with GFP_ACCOUNT flag. If the unloading happens immediately after calling rmdir on the corresponding cgroup, there is some chance to trigger the issue. Link: http://lkml.kernel.org/r/20191129025011.3076017-1-guro@fb.com Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management") Signed-off-by: Roman Gushchin Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/slab_common.c b/mm/slab_common.c index 8afa188f6e20..f0ab6d4ceb4c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct kmem_cache *s) * previous workitems on workqueue are processed. */ flush_workqueue(memcg_kmem_cache_wq); + + /* + * If we're racing with children kmem_cache deactivation, it might + * take another rcu grace period to complete their destruction. + * At this moment the corresponding percpu_ref_kill() call should be + * done, but it might take another rcu grace period to complete + * switching to the atomic mode. + * Please, note that we check without grabbing the slab_mutex. It's safe + * because at this moment the children list can't grow. + */ + if (!list_empty(&s->memcg_params.children)) + rcu_barrier(); } #else static inline int shutdown_memcg_caches(struct kmem_cache *s) From 9d7ea9a297e6445d567056f15b469dde13ca4134 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Dec 2019 16:49:50 -0800 Subject: [PATCH 03/86] mm/vmstat: add helpers to get vmstat item names for each enum type Statistics in vmstat is combined from counters with different structure, but names for them are merged into one array. This patch adds trivial helpers to get name for each item: const char *zone_stat_name(enum zone_stat_item item); const char *numa_stat_name(enum numa_stat_item item); const char *node_stat_name(enum node_stat_item item); const char *writeback_stat_name(enum writeback_stat_item item); const char *vm_event_name(enum vm_event_item item); Names for enum writeback_stat_item are folded in the middle of vmstat_text so this patch moves declaration into header to calculate offset of following items. Also this patch reuses piece of node stat names for lru list names: const char *lru_list_name(enum lru_list lru); This returns common lru list names: "inactive_anon", "active_anon", "inactive_file", "active_file", "unevictable". [khlebnikov@yandex-team.ru: do not use size of vmstat_text as count of /proc/vmstat items] Link: http://lkml.kernel.org/r/157152151769.4139.15423465513138349343.stgit@buzz Link: https://lore.kernel.org/linux-mm/cd1c42ae-281f-c8a8-70ac-1d01d417b2e1@infradead.org/T/#u Link: http://lkml.kernel.org/r/157113012325.453.562783073839432766.stgit@buzz Signed-off-by: Konstantin Khlebnikov Reviewed-by: Andrew Morton Cc: Randy Dunlap Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Cc: YueHaibing Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 9 +++----- include/linux/vmstat.h | 50 +++++++++++++++++++++++++++++++++++++++++ mm/vmstat.c | 51 +++++++++++++++++------------------------- 3 files changed, 73 insertions(+), 37 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 296546ffed6c..98a31bafc8a2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -496,20 +496,17 @@ static ssize_t node_read_vmstat(struct device *dev, int n = 0; for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], + n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i), sum_zone_node_page_state(nid, i)); #ifdef CONFIG_NUMA for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) - n += sprintf(buf+n, "%s %lu\n", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i), sum_zone_numa_state(nid, i)); #endif for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) - n += sprintf(buf+n, "%s %lu\n", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS], + n += sprintf(buf+n, "%s %lu\n", node_stat_name(i), node_page_state(pgdat, i)); return n; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index bdeda4b079fe..b995d8b680c2 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -31,6 +31,12 @@ struct reclaim_stat { unsigned nr_unmap_fail; }; +enum writeback_stat_item { + NR_DIRTY_THRESHOLD, + NR_DIRTY_BG_THRESHOLD, + NR_VM_WRITEBACK_STAT_ITEMS, +}; + #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. @@ -381,4 +387,48 @@ static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, extern const char * const vmstat_text[]; +static inline const char *zone_stat_name(enum zone_stat_item item) +{ + return vmstat_text[item]; +} + +#ifdef CONFIG_NUMA +static inline const char *numa_stat_name(enum numa_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + item]; +} +#endif /* CONFIG_NUMA */ + +static inline const char *node_stat_name(enum node_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + item]; +} + +static inline const char *lru_list_name(enum lru_list lru) +{ + return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" +} + +static inline const char *writeback_stat_name(enum writeback_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + item]; +} + +#ifdef CONFIG_VM_EVENT_COUNTERS +static inline const char *vm_event_name(enum vm_event_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + NR_VM_WRITEBACK_STAT_ITEMS + + item]; +} +#endif /* CONFIG_VM_EVENT_COUNTERS */ + #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/vmstat.c b/mm/vmstat.c index a8222041bd44..fa627329428b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1134,7 +1134,7 @@ const char * const vmstat_text[] = { "numa_other", #endif - /* Node-based counters */ + /* enum node_stat_item counters */ "nr_inactive_anon", "nr_active_anon", "nr_inactive_file", @@ -1564,10 +1564,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, if (is_zone_first_populated(pgdat, zone)) { seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { - seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS], - node_page_state(pgdat, i)); + seq_printf(m, "\n %-12s %lu", node_stat_name(i), + node_page_state(pgdat, i)); } } seq_printf(m, @@ -1600,14 +1598,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, } for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - seq_printf(m, "\n %-12s %lu", vmstat_text[i], - zone_page_state(zone, i)); + seq_printf(m, "\n %-12s %lu", zone_stat_name(i), + zone_page_state(zone, i)); #ifdef CONFIG_NUMA for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) - seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], - zone_numa_state_snapshot(zone, i)); + seq_printf(m, "\n %-12s %lu", numa_stat_name(i), + zone_numa_state_snapshot(zone, i)); #endif seq_printf(m, "\n pagesets"); @@ -1658,31 +1655,23 @@ static const struct seq_operations zoneinfo_op = { .show = zoneinfo_show, }; -enum writeback_stat_item { - NR_DIRTY_THRESHOLD, - NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, -}; +#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ + NR_VM_NUMA_STAT_ITEMS + \ + NR_VM_NODE_STAT_ITEMS + \ + NR_VM_WRITEBACK_STAT_ITEMS + \ + (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ + NR_VM_EVENT_ITEMS : 0)) static void *vmstat_start(struct seq_file *m, loff_t *pos) { unsigned long *v; - int i, stat_items_size; + int i; - if (*pos >= ARRAY_SIZE(vmstat_text)) + if (*pos >= NR_VMSTAT_ITEMS) return NULL; - stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); -#ifdef CONFIG_VM_EVENT_COUNTERS - stat_items_size += sizeof(struct vm_event_state); -#endif - - BUILD_BUG_ON(stat_items_size != - ARRAY_SIZE(vmstat_text) * sizeof(unsigned long)); - v = kmalloc(stat_items_size, GFP_KERNEL); + BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); + v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); m->private = v; if (!v) return ERR_PTR(-ENOMEM); @@ -1715,7 +1704,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) { (*pos)++; - if (*pos >= ARRAY_SIZE(vmstat_text)) + if (*pos >= NR_VMSTAT_ITEMS) return NULL; return (unsigned long *)m->private + *pos; } @@ -1781,7 +1770,7 @@ int vmstat_refresh(struct ctl_table *table, int write, val = atomic_long_read(&vm_zone_stat[i]); if (val < 0) { pr_warn("%s: %s %ld\n", - __func__, vmstat_text[i], val); + __func__, zone_stat_name(i), val); err = -EINVAL; } } @@ -1790,7 +1779,7 @@ int vmstat_refresh(struct ctl_table *table, int write, val = atomic_long_read(&vm_numa_stat[i]); if (val < 0) { pr_warn("%s: %s %ld\n", - __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); + __func__, numa_stat_name(i), val); err = -EINVAL; } } From ebc5d83d04438116c24dcc556b0ab6c8ef64b77e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Dec 2019 16:49:53 -0800 Subject: [PATCH 04/86] mm/memcontrol: use vmstat names for printing statistics Use common names from vmstat array when possible. This gives not much difference in code size for now, but should help in keeping interfaces consistent. add/remove: 0/2 grow/shrink: 2/0 up/down: 70/-72 (-2) Function old new delta memory_stat_format 984 1050 +66 memcg_stat_show 957 961 +4 memcg1_event_names 32 - -32 mem_cgroup_lru_names 40 - -40 Total: Before=14485337, After=14485335, chg -0.00% Link: http://lkml.kernel.org/r/157113012508.453.80391533767219371.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Andrew Morton Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 4 ++-- mm/memcontrol.c | 52 ++++++++++++++++++------------------------ mm/vmstat.c | 9 ++++---- 3 files changed, 29 insertions(+), 36 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b995d8b680c2..292485f3d24d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -420,7 +420,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item) item]; } -#ifdef CONFIG_VM_EVENT_COUNTERS +#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + @@ -429,6 +429,6 @@ static inline const char *vm_event_name(enum vm_event_item item) NR_VM_WRITEBACK_STAT_ITEMS + item]; } -#endif /* CONFIG_VM_EVENT_COUNTERS */ +#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bc01423277c5..c5b5f74cfd4d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -98,14 +98,6 @@ static bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; } -static const char *const mem_cgroup_lru_names[] = { - "inactive_anon", - "active_anon", - "inactive_file", - "active_file", - "unevictable", -}; - #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 @@ -1421,7 +1413,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) PAGE_SIZE); for (i = 0; i < NR_LRU_LISTS; i++) - seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i], + seq_buf_printf(&s, "%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -1434,8 +1426,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg) /* Accumulated memory events */ - seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT), + memcg_events(memcg, PGFAULT)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), + memcg_events(memcg, PGMAJFAULT)); seq_buf_printf(&s, "workingset_refault %lu\n", memcg_page_state(memcg, WORKINGSET_REFAULT)); @@ -1444,22 +1438,27 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "workingset_nodereclaim %lu\n", memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); - seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL), + memcg_events(memcg, PGREFILL)); seq_buf_printf(&s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); seq_buf_printf(&s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE), + memcg_events(memcg, PGACTIVATE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE), + memcg_events(memcg, PGDEACTIVATE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE), + memcg_events(memcg, PGLAZYFREE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED), + memcg_events(memcg, PGLAZYFREED)); #ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "thp_fault_alloc %lu\n", + seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "thp_collapse_alloc %lu\n", + seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), memcg_events(memcg, THP_COLLAPSE_ALLOC)); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -3742,13 +3741,6 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, }; -static const char *const memcg1_event_names[] = { - "pgpgin", - "pgpgout", - "pgfault", - "pgmajfault", -}; - static int memcg_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); @@ -3757,7 +3749,6 @@ static int memcg_stat_show(struct seq_file *m, void *v) unsigned int i; BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); - BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) @@ -3768,11 +3759,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", memcg1_event_names[i], + seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), memcg_events_local(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], + seq_printf(m, "%s %lu\n", lru_list_name(i), memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -3797,11 +3788,12 @@ static int memcg_stat_show(struct seq_file *m, void *v) } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], + seq_printf(m, "total_%s %llu\n", + vm_event_name(memcg1_events[i]), (u64)memcg_events(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], + seq_printf(m, "total_%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE); diff --git a/mm/vmstat.c b/mm/vmstat.c index fa627329428b..78d53378db99 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1084,7 +1084,8 @@ int fragmentation_index(struct zone *zone, unsigned int order) } #endif -#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) +#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ + defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -1172,7 +1173,7 @@ const char * const vmstat_text[] = { "nr_dirty_threshold", "nr_dirty_background_threshold", -#ifdef CONFIG_VM_EVENT_COUNTERS +#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ "pgpgin", "pgpgout", @@ -1291,9 +1292,9 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif -#endif /* CONFIG_VM_EVENTS_COUNTERS */ +#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; -#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ +#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ defined(CONFIG_PROC_FS) From 3cde287bb4769fe9dfc9c532ddc88d90e81bc4c5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 4 Dec 2019 16:49:56 -0800 Subject: [PATCH 05/86] mm/memory.c: replace is_zero_pfn with is_huge_zero_pmd for thp For hugely mapped thp, we use is_huge_zero_pmd() to check if it's zero page or not. We do fill ptes with my_zero_pfn() when we split zero thp pmd, but this is not what we have in vm_normal_page_pmd() -- pmd_trans_huge_lock() makes sure of it. This is a trivial fix for /proc/pid/numa_maps, and AFAIK nobody complains about it. Gerald Schaefer asked: : Maybe the description could also mention the symptom of this bug? : I would assume that it affects anon/dirty accounting in gather_pte_stats(), : for huge mappings, if zero page mappings are not correctly recognized. I came across this while I was looking at the code, so I'm not aware of any symptom. Link: http://lkml.kernel.org/r/20191108192629.201556-1-yuzhao@google.com Signed-off-by: Yu Zhao Acked-by: Andrew Morton Cc: Matthew Wilcox Cc: Ralph Campbell Cc: Will Deacon Cc: Peter Zijlstra Cc: "Aneesh Kumar K . V" Cc: Dave Airlie Cc: Thomas Hellstrom Cc: Souptick Joarder Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 513c3ecc76ee..e455160e0f75 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -672,7 +672,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, if (pmd_devmap(pmd)) return NULL; - if (is_zero_pfn(pfn)) + if (is_huge_zero_pmd(pmd)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) return NULL; From e06689bf57017ac022ccf0f2a5071f760821ce0f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 Dec 2019 16:49:59 -0800 Subject: [PATCH 06/86] proc: change ->nlink under proc_subdir_lock Currently gluing PDE into global /proc tree is done under lock, but changing ->nlink is not. Additionally struct proc_dir_entry::nlink is not atomic so updates can be lost. Link: http://lkml.kernel.org/r/20190925202436.GA17388@avx2 Signed-off-by: Alexey Dobriyan Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 64e9ee1b129e..d4f353187d67 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -138,8 +138,12 @@ static int proc_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct proc_dir_entry *de = PDE(inode); - if (de && de->nlink) - set_nlink(inode, de->nlink); + if (de) { + nlink_t nlink = READ_ONCE(de->nlink); + if (nlink > 0) { + set_nlink(inode, nlink); + } + } generic_fillattr(inode, stat); return 0; @@ -362,6 +366,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, write_unlock(&proc_subdir_lock); goto out_free_inum; } + dir->nlink++; write_unlock(&proc_subdir_lock); return dp; @@ -472,10 +477,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->data = data; ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; - parent->nlink++; ent = proc_register(parent, ent); - if (!ent) - parent->nlink--; } return ent; } @@ -505,10 +507,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->data = NULL; ent->proc_fops = NULL; ent->proc_iops = NULL; - parent->nlink++; ent = proc_register(parent, ent); - if (!ent) - parent->nlink--; } return ent; } @@ -666,8 +665,12 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) len = strlen(fn); de = pde_subdir_find(parent, fn, len); - if (de) + if (de) { rb_erase(&de->subdir_node, &parent->subdir); + if (S_ISDIR(de->mode)) { + parent->nlink--; + } + } write_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -676,9 +679,6 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) proc_entry_rundown(de); - if (S_ISDIR(de->mode)) - parent->nlink--; - de->nlink = 0; WARN(pde_subdir_first(de), "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, pde_subdir_first(de)->name); @@ -714,13 +714,12 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) de = next; continue; } - write_unlock(&proc_subdir_lock); - - proc_entry_rundown(de); next = de->parent; if (S_ISDIR(de->mode)) next->nlink--; - de->nlink = 0; + write_unlock(&proc_subdir_lock); + + proc_entry_rundown(de); if (de == root) break; pde_put(de); From 5f6354eaa517ce5c1e7b7feb224a0324601f796e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 Dec 2019 16:50:02 -0800 Subject: [PATCH 07/86] fs/proc/generic.c: delete useless "len" variable Pointer to next '/' encodes length of path element and next start position. Subtraction and increment are redundant. Link: http://lkml.kernel.org/r/20191004234521.GA30246@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index d4f353187d67..074e9585c699 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -163,7 +163,6 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, { const char *cp = name, *next; struct proc_dir_entry *de; - unsigned int len; de = *ret; if (!de) @@ -174,13 +173,12 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, if (!next) break; - len = next - cp; - de = pde_subdir_find(de, cp, len); + de = pde_subdir_find(de, cp, next - cp); if (!de) { WARN(1, "name '%s'\n", name); return -ENOENT; } - cp += len + 1; + cp = next + 1; } *residual = cp; *ret = de; From 70a731c0e3c603e84607b770c15e53da4c3a6ecd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 Dec 2019 16:50:05 -0800 Subject: [PATCH 08/86] fs/proc/internal.h: shuffle "struct pde_opener" List iteration takes more code than anything else which means embedded list_head should be the first element of the structure. Space savings: add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-18 (-18) Function old new delta close_pdeo 228 227 -1 proc_reg_release 86 82 -4 proc_entry_rundown 143 139 -4 proc_reg_open 298 289 -9 Link: http://lkml.kernel.org/r/20191004234753.GB30246@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index cd0c8d5ce9a1..0f3b557c9b77 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -197,8 +197,8 @@ extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, lof * inode.c */ struct pde_opener { - struct file *file; struct list_head lh; + struct file *file; bool closing; struct completion *c; } __randomize_layout; From 9573e8f70a82bcbac95b1ea222ac9d5e50266f9f Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 4 Dec 2019 16:50:08 -0800 Subject: [PATCH 09/86] include/linux/proc_fs.h: fix confusing macro arg name state_size and ops are in the wrong position. Link: http://lkml.kernel.org/r/20190910021747.11216-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Andrew Morton Acked-by: Aleksa Sarai Reviewed-by: Christian Brauner Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index a705aa2d03f9..0640be56dcbd 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -58,8 +58,8 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *); struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct seq_operations *ops, unsigned int state_size, void *data); -#define proc_create_net(name, mode, parent, state_size, ops) \ - proc_create_net_data(name, mode, parent, state_size, ops, NULL) +#define proc_create_net(name, mode, parent, ops, state_size) \ + proc_create_net_data(name, mode, parent, ops, state_size, NULL) struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, struct proc_dir_entry *parent, int (*show)(struct seq_file *, void *), void *data); From 3d82191c22e20836812afdaef22c5965ae2c55b8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Dec 2019 16:50:11 -0800 Subject: [PATCH 10/86] fs/proc/Kconfig: fix indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ / /' -i */Kconfig [adobriyan@gmail.com: add two spaces where necessary] Link: http://lkml.kernel.org/r/20191124133936.GA5655@avx2 Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index cb5629bd5fff..733881a6387b 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -42,8 +42,8 @@ config PROC_VMCORE bool "/proc/vmcore support" depends on PROC_FS && CRASH_DUMP default y - help - Exports the dump image of crashed kernel in ELF format. + help + Exports the dump image of crashed kernel in ELF format. config PROC_VMCORE_DEVICE_DUMP bool "Device Hardware/Firmware Log Collection" @@ -72,7 +72,7 @@ config PROC_SYSCTL a recompile of the kernel or reboot of the system. The primary interface is through /proc/sys. If you say Y here a tree of modifiable sysctl entries will be generated beneath the - /proc/sys directory. They are explained in the files + /proc/sys directory. They are explained in the files in . Note that enabling this option will enlarge the kernel by at least 8 KB. @@ -88,7 +88,7 @@ config PROC_PAGE_MONITOR Various /proc files exist to monitor process memory utilization: /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, /proc/kpagecount, and /proc/kpageflags. Disabling these - interfaces will reduce the size of the kernel by approximately 4kb. + interfaces will reduce the size of the kernel by approximately 4kb. config PROC_CHILDREN bool "Include /proc//task//children file" From d5ffb71b633cd5c4b8cce633c9d6448dced4eb74 Mon Sep 17 00:00:00 2001 From: Alessio Balsini Date: Wed, 4 Dec 2019 16:50:14 -0800 Subject: [PATCH 11/86] include/linux/sysctl.h: inline braces for ctl_table and ctl_table_header Fix coding style of "struct ctl_table" and "struct ctl_table_header" to have inline braces. Besides the wide use of this proposed cose style, this change helps to find at a glance the struct definition when navigating the code. Link: http://lkml.kernel.org/r/20190903154906.188651-1-balsini@android.com Signed-off-by: Alessio Balsini Acked-by: Luis Chamberlain Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6df477329b76..02fa84493f23 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -120,8 +120,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) /* A sysctl table is an array of struct ctl_table: */ -struct ctl_table -{ +struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; @@ -140,8 +139,7 @@ struct ctl_node { /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ -struct ctl_table_header -{ +struct ctl_table_header { union { struct { struct ctl_table *ctl_table; From a512ae54cee18296384f45201b3118345ce0dc80 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 4 Dec 2019 16:50:17 -0800 Subject: [PATCH 12/86] .gitattributes: use 'dts' diff driver for dts files Git is gaining support to display the closest node to the diff in the hunk header via the 'dts' diff driver. Use that driver for all dts and dtsi files so we can gain some more context on where the diff is. Taking a recent commit in the kernel dts files you can see the difference. With this patch and an updated git : diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : index 62e07e1197cc..4c38426a6969 100644 : --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : @@ -289,5 +289,29 @@ vdd_hdmi: regulator@1 { : gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>; : enable-active-high; : }; : + : + vdd_3v3_pcie: regulator@2 { : + compatible = "regulator-fixed"; vs. without this patch : diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : index 62e07e1197cc..4c38426a6969 100644 : --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi : @@ -289,5 +289,29 @@ : gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>; : enable-active-high; : }; : + : + vdd_3v3_pcie: regulator@2 { : + compatible = "regulator-fixed"; You can see that we don't know what the context node is because it isn't shown after the '@@'. dts is not released yet but it is staged to be in the next release[1]. One can probably build git from source and try it out. [1] https://git.kernel.org/pub/scm/git/git.git/commit/?id=d49c2c3466d2c8cb0b3d0a43e6b406b07078fdb1 Link: http://lkml.kernel.org/r/20191004212311.141538-1-swboyd@chromium.org Signed-off-by: Stephen Boyd Cc: Rob Herring Cc: Randy Dunlap Acked-by: Frank Rowand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitattributes b/.gitattributes index 89c411b5ce6b..4b32eaa9571e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,4 @@ *.c diff=cpp *.h diff=cpp +*.dtsi diff=dts +*.dts diff=dts From 8788994376d84d627450fd0d67deb6a66ddf07d7 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Wed, 4 Dec 2019 16:50:20 -0800 Subject: [PATCH 13/86] linux/build_bug.h: change type to int Having BUILD_BUG_ON_ZERO produce a value of type size_t leads to awkward casts in cases where the result needs to be signed, or of smaller type than size_t. To avoid this, cast the value to int instead and rely on implicit type conversions when a larger or unsigned type is needed. Link: http://lkml.kernel.org/r/20190811184938.1796-3-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Suggested-by: Masahiro Yamada Reviewed-by: Kees Cook Reviewed-by: Masahiro Yamada Cc: Joe Perches Cc: Johannes Berg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 0fe5426f2bdc..e3a0be2c90ad 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -9,11 +9,11 @@ #else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a - * result (of value 0 and type size_t), so the expression can be used + * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) #endif /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ From 1a18374fc370da5a335b061fe94ebf677b07bd1d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:50:23 -0800 Subject: [PATCH 14/86] linux/scc.h: make uapi linux/scc.h self-contained Userspace cannot compile CC usr/include/linux/scc.h.s In file included from :32:0: usr/include/linux/scc.h:20:20: error: `SIOCDEVPRIVATE' undeclared here (not in a function) SIOCSCCRESERVED = SIOCDEVPRIVATE, ^~~~~~~~~~~~~~ Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191108055809.26969-1-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/scc.h | 1 + usr/include/Makefile | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/scc.h b/include/uapi/linux/scc.h index c5bc7f747755..947edb17ce9d 100644 --- a/include/uapi/linux/scc.h +++ b/include/uapi/linux/scc.h @@ -4,6 +4,7 @@ #ifndef _UAPI_SCC_H #define _UAPI_SCC_H +#include /* selection of hardware types */ diff --git a/usr/include/Makefile b/usr/include/Makefile index 24543a30b9f0..5acd9bb6d714 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -40,7 +40,6 @@ header-test- += linux/omapfb.h header-test- += linux/patchkey.h header-test- += linux/phonet.h header-test- += linux/reiserfs_xattr.h -header-test- += linux/scc.h header-test- += linux/sctp.h header-test- += linux/signal.h header-test- += linux/sysctl.h From 24b54fee106d8f9ea41f71e366e03f6a3f083a15 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Dec 2019 16:50:26 -0800 Subject: [PATCH 15/86] arch/Kconfig: fix indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ / /' -i */Kconfig Link: http://lkml.kernel.org/r/1574306573-10886-1-git-send-email-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Jiri Kosina Cc: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 7b861fe3f900..48b5e103bdb0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -72,11 +72,11 @@ config KPROBES If in doubt, say "N". config JUMP_LABEL - bool "Optimize very unlikely/likely branches" - depends on HAVE_ARCH_JUMP_LABEL - depends on CC_HAS_ASM_GOTO - help - This option enables a transparent branch optimization that + bool "Optimize very unlikely/likely branches" + depends on HAVE_ARCH_JUMP_LABEL + depends on CC_HAS_ASM_GOTO + help + This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch conditions even cheaper to execute within the kernel. @@ -84,7 +84,7 @@ config JUMP_LABEL scheduler functionality, networking code and KVM have such branches and include support for this optimization technique. - If it is detected that the compiler has support for "asm goto", + If it is detected that the compiler has support for "asm goto", the kernel will compile such branches with just a nop instruction. When the condition flag is toggled to true, the nop will be converted to a jump instruction to execute the @@ -151,8 +151,8 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS information on the topic of unaligned memory accesses. config ARCH_USE_BUILTIN_BSWAP - bool - help + bool + help Modern versions of GCC (since 4.4) have builtin functions for handling byte-swapping. Using these, instead of the old inline assembler that the architecture code provides in the @@ -221,10 +221,10 @@ config HAVE_DMA_CONTIGUOUS bool config GENERIC_SMP_IDLE_THREAD - bool + bool config GENERIC_IDLE_POLL_SETUP - bool + bool config ARCH_HAS_FORTIFY_SOURCE bool @@ -257,7 +257,7 @@ config ARCH_HAS_UNCACHED_SEGMENT # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK - bool + bool # Select if arch has its private alloc_task_struct() function config ARCH_TASK_STRUCT_ALLOCATOR From 2f5bd343694ed53b3abc4a616ce975505271afe7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 4 Dec 2019 16:50:29 -0800 Subject: [PATCH 16/86] scripts/get_maintainer.pl: add signatures from Fixes: lines in commit message A Fixes: lines in a commit message generally indicate that a previous commit was inadequate for whatever reason. The signers of the previous inadequate commit should also be cc'd on this new commit so update get_maintainer to find the old commit and add the original signers. Link: http://lkml.kernel.org/r/33605b9fc0e0f711236951ae84185a6218acff4f.camel@perches.com Signed-off-by: Joe Perches Suggested-by: Dan Carpenter Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 5ef59214c555..34085d146fa2 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -26,6 +26,7 @@ my $email = 1; my $email_usename = 1; my $email_maintainer = 1; my $email_reviewer = 1; +my $email_fixes = 1; my $email_list = 1; my $email_moderated_list = 1; my $email_subscriber_list = 0; @@ -249,6 +250,7 @@ if (!GetOptions( 'r!' => \$email_reviewer, 'n!' => \$email_usename, 'l!' => \$email_list, + 'fixes!' => \$email_fixes, 'moderated!' => \$email_moderated_list, 's!' => \$email_subscriber_list, 'multiline!' => \$output_multiline, @@ -503,6 +505,7 @@ sub read_mailmap { ## use the filenames on the command line or find the filenames in the patchfiles my @files = (); +my @fixes = (); # If a patch description includes Fixes: lines my @range = (); my @keyword_tvi = (); my @file_emails = (); @@ -568,6 +571,8 @@ foreach my $file (@ARGV) { my $filename2 = $2; push(@files, $filename1); push(@files, $filename2); + } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) { + push(@fixes, $1) if ($email_fixes); } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) { my $filename = $1; $filename =~ s@^[^/]*/@@; @@ -598,6 +603,7 @@ foreach my $file (@ARGV) { } @file_emails = uniq(@file_emails); +@fixes = uniq(@fixes); my %email_hash_name; my %email_hash_address; @@ -612,7 +618,6 @@ my %deduplicate_name_hash = (); my %deduplicate_address_hash = (); my @maintainers = get_maintainers(); - if (@maintainers) { @maintainers = merge_email(@maintainers); output(@maintainers); @@ -927,6 +932,10 @@ sub get_maintainers { } } + foreach my $fix (@fixes) { + vcs_add_commit_signers($fix, "blamed_fixes"); + } + foreach my $email (@email_to, @list_to) { $email->[0] = deduplicate_email($email->[0]); } @@ -1031,6 +1040,7 @@ MAINTAINER field selection options: --roles => show roles (status:subsystem, git-signer, list, etc...) --rolestats => show roles and statistics (commits/total_commits, %) --file-emails => add email addresses found in -f file (default: 0 (off)) + --fixes => for patches, add signatures of commits with 'Fixes: ' (default: 1 (on)) --scm => print SCM tree(s) if any --status => print status if any --subsystem => print subsystem name if any @@ -1730,6 +1740,32 @@ sub vcs_is_hg { return $vcs_used == 2; } +sub vcs_add_commit_signers { + return if (!vcs_exists()); + + my ($commit, $desc) = @_; + my $commit_count = 0; + my $commit_authors_ref; + my $commit_signers_ref; + my $stats_ref; + my @commit_authors = (); + my @commit_signers = (); + my $cmd; + + $cmd = $VCS_cmds{"find_commit_signers_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd + + ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, ""); + @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref; + @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref; + + foreach my $signer (@commit_signers) { + $signer = deduplicate_email($signer); + } + + vcs_assign($desc, 1, @commit_signers); +} + sub interactive_get_maintainers { my ($list_ref) = @_; my @list = @$list_ref; From 885e68e8b7b1328aa1e28b27e13fbfb5f020d269 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:50:32 -0800 Subject: [PATCH 17/86] kernel.h: update comment about simple_strto() functions There were discussions in the past about use cases for simple_strto() functions and, in some rare cases, they have a benefit over kstrto() ones. Update a comment to reduce confusion about special use cases. Link: http://lkml.kernel.org/r/20190801192904.41087-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Suggested-by: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 09f759228e3f..3adcb39fa6f5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -348,8 +348,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Used as a replacement for the simple_strtoull. Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -377,8 +376,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Used as a replacement for the simple_strtoull. Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { @@ -454,7 +452,18 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t return kstrtoint_from_user(s, count, base, res); } -/* Obsolete, do not use. Use kstrto instead */ +/* + * Use kstrto instead. + * + * NOTE: simple_strto does not check for the range overflow and, + * depending on the input, may give interesting results. + * + * Use these functions if and only if you cannot use kstrto, because + * the conversion ends on the first non-digit character, which may be far + * beyond the supported range. It might be useful to parse the strings like + * 10x50 or 12:21 without altering original string or temporary buffer in use. + * Keep in mind above caveat. + */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); From d717e7da45b382c09cee4f1e03ce30124e672e07 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:50:36 -0800 Subject: [PATCH 18/86] auxdisplay: charlcd: deduplicate simple_strtoul() Like in commit 8b2303de399f ("serial: core: Fix handling of options after MMIO address") we may use simple_strtoul() which in comparison to kstrtoul() can do conversion in-place without additional and unnecessary code to be written. Link: http://lkml.kernel.org/r/20190801192904.41087-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Petr Mladek Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Miguel Ojeda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/auxdisplay/charlcd.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index bef6b85778b6..874c259a8829 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -287,31 +287,6 @@ static int charlcd_init_display(struct charlcd *lcd) return 0; } -/* - * Parses an unsigned integer from a string, until a non-digit character - * is found. The empty string is not accepted. No overflow checks are done. - * - * Returns whether the parsing was successful. Only in that case - * the output parameters are written to. - * - * TODO: If the kernel adds an inplace version of kstrtoul(), this function - * could be easily replaced by that. - */ -static bool parse_n(const char *s, unsigned long *res, const char **next_s) -{ - if (!isdigit(*s)) - return false; - - *res = 0; - while (isdigit(*s)) { - *res = *res * 10 + (*s - '0'); - ++s; - } - - *next_s = s; - return true; -} - /* * Parses a movement command of the form "(.*);", where the group can be * any number of subcommands of the form "(x|y)[0-9]+". @@ -336,6 +311,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y) { unsigned long new_x = *x; unsigned long new_y = *y; + char *p; for (;;) { if (!*s) @@ -345,11 +321,15 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y) break; if (*s == 'x') { - if (!parse_n(s + 1, &new_x, &s)) + new_x = simple_strtoul(s + 1, &p, 10); + if (p == s + 1) return false; + s = p; } else if (*s == 'y') { - if (!parse_n(s + 1, &new_y, &s)) + new_y = simple_strtoul(s + 1, &p, 10); + if (p == s + 1) return false; + s = p; } else { return false; } From 1a50cb80f219c44adb6265f5071b81fc3c1deced Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:39 -0800 Subject: [PATCH 19/86] kernel/notifier.c: intercept duplicate registrations to avoid infinite loops Registering the same notifier to a hook repeatedly can cause the hook list to form a ring or lose other members of the list. case1: An infinite loop in notifier_chain_register() can cause soft lockup atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test2); case2: An infinite loop in notifier_chain_register() can cause soft lockup atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_call_chain(&test_notifier_list, 0, NULL); case3: lose other hook test2 atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test2); atomic_notifier_chain_register(&test_notifier_list, &test1); case4: Unregister returns 0, but the hook is still in the linked list, and it is not really registered. If you call notifier_call_chain after ko is unloaded, it will trigger oops. If the system is configured with softlockup_panic and the same hook is repeatedly registered on the panic_notifier_list, it will cause a loop panic. Add a check in notifier_chain_register(), intercepting duplicate registrations to avoid infinite loops Link: http://lkml.kernel.org/r/1568861888-34045-2-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Vasily Averin Reviewed-by: Andrew Morton Cc: Alexey Dobriyan Cc: Anna Schumaker Cc: Arjan van de Ven Cc: J. Bruce Fields Cc: Chuck Lever Cc: David S. Miller Cc: Jeff Layton Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Alan Stern Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Viresh Kumar Cc: Xiaoming Ni Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/notifier.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/notifier.c b/kernel/notifier.c index d9f5081d578d..30bedb8be6dd 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -23,7 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl, struct notifier_block *n) { while ((*nl) != NULL) { - WARN_ONCE(((*nl) == n), "double register detected"); + if (unlikely((*nl) == n)) { + WARN(1, "double register detected"); + return 0; + } if (n->priority > (*nl)->priority) break; nl = &((*nl)->next); From 5adaabb65a267d890b29193af2dbc38a3b85bbf2 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:43 -0800 Subject: [PATCH 20/86] kernel/notifier.c: remove notifier_chain_cond_register() The only difference between notifier_chain_cond_register() and notifier_chain_register() is the lack of warning hints for duplicate registrations. Use notifier_chain_register() instead of notifier_chain_cond_register() to avoid duplicate code Link: http://lkml.kernel.org/r/1568861888-34045-3-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Andrew Morton Cc: Alan Stern Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Anna Schumaker Cc: Arjan van de Ven Cc: Chuck Lever Cc: David S. Miller Cc: Ingo Molnar Cc: J. Bruce Fields Cc: Jeff Layton Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Vasily Averin Cc: Viresh Kumar Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/notifier.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/kernel/notifier.c b/kernel/notifier.c index 30bedb8be6dd..e3d221f092fe 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -36,21 +36,6 @@ static int notifier_chain_register(struct notifier_block **nl, return 0; } -static int notifier_chain_cond_register(struct notifier_block **nl, - struct notifier_block *n) -{ - while ((*nl) != NULL) { - if ((*nl) == n) - return 0; - if (n->priority > (*nl)->priority) - break; - nl = &((*nl)->next); - } - n->next = *nl; - rcu_assign_pointer(*nl, n); - return 0; -} - static int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) { @@ -252,7 +237,7 @@ int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, int ret; down_write(&nh->rwsem); - ret = notifier_chain_cond_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n); up_write(&nh->rwsem); return ret; } From 260a2679e5cbfb3d8a4cf6cd1cb6f57e89c7e543 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:47 -0800 Subject: [PATCH 21/86] kernel/notifier.c: remove blocking_notifier_chain_cond_register() blocking_notifier_chain_cond_register() does not consider system_booting state, which is the only difference between this function and blocking_notifier_cain_register(). This can be a bug and is a piece of duplicate code. Delete blocking_notifier_chain_cond_register() Link: http://lkml.kernel.org/r/1568861888-34045-4-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Andrew Morton Cc: Alan Stern Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Anna Schumaker Cc: Arjan van de Ven Cc: Chuck Lever Cc: David S. Miller Cc: Ingo Molnar Cc: J. Bruce Fields Cc: Jeff Layton Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Vasily Averin Cc: Viresh Kumar Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 4 ---- kernel/notifier.c | 23 ----------------------- net/sunrpc/rpc_pipe.c | 2 +- 3 files changed, 1 insertion(+), 28 deletions(-) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 0096a05395e3..018947611483 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -150,10 +150,6 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); -extern int blocking_notifier_chain_cond_register( - struct blocking_notifier_head *nh, - struct notifier_block *nb); - extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, diff --git a/kernel/notifier.c b/kernel/notifier.c index e3d221f092fe..63d7501ac638 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -220,29 +220,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); -/** - * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain - * @nh: Pointer to head of the blocking notifier chain - * @n: New entry in notifier chain - * - * Adds a notifier to a blocking notifier chain, only if not already - * present in the chain. - * Must be called in process context. - * - * Currently always returns zero. - */ -int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, - struct notifier_block *n) -{ - int ret; - - down_write(&nh->rwsem); - ret = notifier_chain_register(&nh->head, n); - up_write(&nh->rwsem); - return ret; -} -EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); - /** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b71a39ded930..39e14d5edaf1 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -51,7 +51,7 @@ static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); int rpc_pipefs_notifier_register(struct notifier_block *nb) { - return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb); + return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); From ef70eff9dea66f38f8c2c2dcc7fe4b7a2bbb4921 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 4 Dec 2019 16:50:50 -0800 Subject: [PATCH 22/86] kernel/profile.c: use cpumask_available to check for NULL cpumask When building with clang + -Wtautological-pointer-compare, these instances pop up: kernel/profile.c:339:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (prof_cpu_mask != NULL) ^~~~~~~~~~~~~ ~~~~ kernel/profile.c:376:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (prof_cpu_mask != NULL) ^~~~~~~~~~~~~ ~~~~ kernel/profile.c:406:26: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (!user_mode(regs) && prof_cpu_mask != NULL && ^~~~~~~~~~~~~ ~~~~ 3 warnings generated. This can be addressed with the cpumask_available helper, introduced in commit f7e30f01a9e2 ("cpumask: Add helper cpumask_available()") to fix warnings like this while keeping the code the same. Link: https://github.com/ClangBuiltLinux/linux/issues/747 Link: http://lkml.kernel.org/r/20191022191957.9554-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Reviewed-by: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/profile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/profile.c b/kernel/profile.c index af7c94bf5fa1..4b144b02ca5d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu) struct page *page; int i; - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_clear_cpu(cpu, prof_cpu_mask); for (i = 0; i < 2; i++) { @@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu) static int profile_online_cpu(unsigned int cpu) { - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_set_cpu(cpu, prof_cpu_mask); return 0; @@ -403,7 +403,7 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && prof_cpu_mask != NULL && + if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } From 5e1aada08cd19ea652b2d32a250501d09b02ff2e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 4 Dec 2019 16:50:53 -0800 Subject: [PATCH 23/86] kernel/sys.c: avoid copying possible padding bytes in copy_to_user Initialization is not guaranteed to zero padding bytes so use an explicit memset instead to avoid leaking any kernel content in any possible padding bytes. Link: http://lkml.kernel.org/r/dfa331c00881d61c8ee51577a082d8bebd61805c.camel@perches.com Signed-off-by: Joe Perches Cc: Dan Carpenter Cc: Julia Lawall Cc: Thomas Gleixner Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index d3aef31e24dc..a9331f101883 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - struct oldold_utsname tmp = {}; + struct oldold_utsname tmp; if (!name) return -EFAULT; + memset(&tmp, 0, sizeof(tmp)); + down_read(&uts_sem); memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); From 169c474fb22d8a5e909e172f177b957546d0519d Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:50:57 -0800 Subject: [PATCH 24/86] bitops: introduce the for_each_set_clump8 macro Pach series "Introduce the for_each_set_clump8 macro", v18. While adding GPIO get_multiple/set_multiple callback support for various drivers, I noticed a pattern of looping manifesting that would be useful standardized as a macro. This patchset introduces the for_each_set_clump8 macro and utilizes it in several GPIO drivers. The for_each_set_clump macro8 facilitates a for-loop syntax that iterates over a memory region entire groups of set bits at a time. For example, suppose you would like to iterate over a 32-bit integer 8 bits at a time, skipping over 8-bit groups with no set bit, where XXXXXXXX represents the current 8-bit group: Example: 10111110 00000000 11111111 00110011 First loop: 10111110 00000000 11111111 XXXXXXXX Second loop: 10111110 00000000 XXXXXXXX 00110011 Third loop: XXXXXXXX 00000000 11111111 00110011 Each iteration of the loop returns the next 8-bit group that has at least one set bit. The for_each_set_clump8 macro has four parameters: * start: set to the bit offset of the current clump * clump: set to the current clump value * bits: bitmap to search within * size: bitmap size in number of bits In this version of the patchset, the for_each_set_clump macro has been reimplemented and simplified based on the suggestions provided by Rasmus Villemoes and Andy Shevchenko in the version 4 submission. In particular, the function of the for_each_set_clump macro has been restricted to handle only 8-bit clumps; the drivers that use the for_each_set_clump macro only handle 8-bit ports so a generic for_each_set_clump implementation is not necessary. Thus, a solution for large clumps (i.e. those larger than the width of a bitmap word) can be postponed until a driver appears that actually requires such a generic for_each_set_clump implementation. For what it's worth, a semi-generic for_each_set_clump (i.e. for clumps smaller than the width of a bitmap word) can be implemented by simply replacing the hardcoded '8' and '0xFF' instances with respective variables. I have not yet had a need for such an implementation, and since it falls short of a true generic for_each_set_clump function, I have decided to forgo such an implementation for now. In addition, the bitmap_get_value8 and bitmap_set_value8 functions are introduced to get and set 8-bit values respectively. Their use is based on the behavior suggested in the patchset version 4 review. This patch (of 14): This macro iterates for each 8-bit group of bits (clump) with set bits, within a bitmap memory region. For each iteration, "start" is set to the bit offset of the found clump, while the respective clump value is stored to the location pointed by "clump". Additionally, the bitmap_get_value8 and bitmap_set_value8 functions are introduced to respectively get and set an 8-bit value in a bitmap memory region. [gustavo@embeddedor.com: fix potential sign-extension overflow] Link: http://lkml.kernel.org/r/20191015184657.GA26541@embeddedor [akpm@linux-foundation.org: s/ULL/UL/, per Joe] [vilhelm.gray@gmail.com: add for_each_set_clump8 documentation] Link: http://lkml.kernel.org/r/20191016161825.301082-1-vilhelm.gray@gmail.com Link: http://lkml.kernel.org/r/893c3b4f03266c9496137cc98ac2b1bd27f92c73.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Signed-off-by: Gustavo A. R. Silva Suggested-by: Andy Shevchenko Suggested-by: Rasmus Villemoes Suggested-by: Lukas Wunner Tested-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Masahiro Yamada Cc: Geert Uytterhoeven Cc: Phil Reid Cc: Geert Uytterhoeven Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bitops/find.h | 17 +++++++++++++++ include/linux/bitmap.h | 35 +++++++++++++++++++++++++++++++ include/linux/bitops.h | 12 +++++++++++ lib/find_bit.c | 14 +++++++++++++ 4 files changed, 78 insertions(+) diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 8a1ee10014de..9fdf21302fdf 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -80,4 +80,21 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr, #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 29fc933df3bf..9f046609e809 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -66,6 +66,8 @@ * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst + * bitmap_get_value8(map, start) Get 8bit value from map at start + * bitmap_set_value8(map, value, start) Set 8bit value to map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long @@ -489,6 +491,39 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) dst[1] = mask >> 32; } +/** + * bitmap_get_value8 - get an 8-bit value within a memory region + * @map: address to the bitmap memory region + * @start: bit offset of the 8-bit value; must be a multiple of 8 + * + * Returns the 8-bit value located at the @start bit offset within the @src + * memory region. + */ +static inline unsigned long bitmap_get_value8(const unsigned long *map, + unsigned long start) +{ + const size_t index = BIT_WORD(start); + const unsigned long offset = start % BITS_PER_LONG; + + return (map[index] >> offset) & 0xFF; +} + +/** + * bitmap_set_value8 - set an 8-bit value within a memory region + * @map: address to the bitmap memory region + * @value: the 8-bit value; values wider than 8 bits may clobber bitmap + * @start: bit offset of the 8-bit value; must be a multiple of 8 + */ +static inline void bitmap_set_value8(unsigned long *map, unsigned long value, + unsigned long start) +{ + const size_t index = BIT_WORD(start); + const unsigned long offset = start % BITS_PER_LONG; + + map[index] &= ~(0xFFUL << offset); + map[index] |= value << offset; +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c94a9ff9f082..e479067c202c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,6 +47,18 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) +/** + * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits + * @start: bit offset to start search and to store the current iteration offset + * @clump: location to store copy of current 8-bit clump + * @bits: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); \ + (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) + static inline int get_bitmask_order(unsigned int count) { int order; diff --git a/lib/find_bit.c b/lib/find_bit.c index 5c51eb45178a..e35a76b291e6 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -214,3 +214,17 @@ EXPORT_SYMBOL(find_next_bit_le); #endif #endif /* __BIG_ENDIAN */ + +unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + offset = find_next_bit(addr, size, offset); + if (offset == size) + return size; + + offset = round_down(offset, 8); + *clump = bitmap_get_value8(addr, offset); + + return offset; +} +EXPORT_SYMBOL(find_next_clump8); From e4aa168de88da92f05dd5f13ceb63711011a9b04 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:01 -0800 Subject: [PATCH 25/86] lib/test_bitmap.c: add for_each_set_clump8 test cases The introduction of the for_each_set_clump8 macro warrants test cases to verify the implementation. This patch adds test case checks for whether an out-of-bounds clump index is returned, a zero clump is returned, or the returned clump value differs from the expected clump value. Link: http://lkml.kernel.org/r/febc0fb8151e3e3fdd61c34da9193d1c4d7e6c12.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Tested-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 51a98f7ee79e..dc167c13eb39 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -92,6 +92,36 @@ __check_eq_u32_array(const char *srcfile, unsigned int line, return true; } +static bool __init __check_eq_clump8(const char *srcfile, unsigned int line, + const unsigned int offset, + const unsigned int size, + const unsigned char *const clump_exp, + const unsigned long *const clump) +{ + unsigned long exp; + + if (offset >= size) { + pr_warn("[%s:%u] bit offset for clump out-of-bounds: expected less than %u, got %u\n", + srcfile, line, size, offset); + return false; + } + + exp = clump_exp[offset / 8]; + if (!exp) { + pr_warn("[%s:%u] bit offset for zero clump: expected nonzero clump, got bit offset %u with clump value 0", + srcfile, line, offset); + return false; + } + + if (*clump != exp) { + pr_warn("[%s:%u] expected clump value of 0x%lX, got clump value of 0x%lX", + srcfile, line, exp, *clump); + return false; + } + + return true; +} + #define __expect_eq(suffix, ...) \ ({ \ int result = 0; \ @@ -108,6 +138,7 @@ __check_eq_u32_array(const char *srcfile, unsigned int line, #define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__) #define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) #define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) +#define expect_eq_clump8(...) __expect_eq(clump8, ##__VA_ARGS__) static void __init test_zero_clear(void) { @@ -404,6 +435,39 @@ static void noinline __init test_mem_optimisations(void) } } +static const unsigned char clump_exp[] __initconst = { + 0x01, /* 1 bit set */ + 0x02, /* non-edge 1 bit set */ + 0x00, /* zero bits set */ + 0x38, /* 3 bits set across 4-bit boundary */ + 0x38, /* Repeated clump */ + 0x0F, /* 4 bits set */ + 0xFF, /* all bits set */ + 0x05, /* non-adjacent 2 bits set */ +}; + +static void __init test_for_each_set_clump8(void) +{ +#define CLUMP_EXP_NUMBITS 64 + DECLARE_BITMAP(bits, CLUMP_EXP_NUMBITS); + unsigned int start; + unsigned long clump; + + /* set bitmap to test case */ + bitmap_zero(bits, CLUMP_EXP_NUMBITS); + bitmap_set(bits, 0, 1); /* 0x01 */ + bitmap_set(bits, 9, 1); /* 0x02 */ + bitmap_set(bits, 27, 3); /* 0x28 */ + bitmap_set(bits, 35, 3); /* 0x28 */ + bitmap_set(bits, 40, 4); /* 0x0F */ + bitmap_set(bits, 48, 8); /* 0xFF */ + bitmap_set(bits, 56, 1); /* 0x05 - part 1 */ + bitmap_set(bits, 58, 1); /* 0x05 - part 2 */ + + for_each_set_clump8(start, clump, bits, CLUMP_EXP_NUMBITS) + expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump); +} + static void __init selftest(void) { test_zero_clear(); @@ -413,6 +477,7 @@ static void __init selftest(void) test_bitmap_parselist(); test_bitmap_parselist_user(); test_mem_optimisations(); + test_for_each_set_clump8(); } KSTM_MODULE_LOADERS(test_bitmap); From f70dad5d4c0fd4e184483b70939e935e060d9208 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:04 -0800 Subject: [PATCH 26/86] gpio: 104-dio-48e: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/08b9c9a3e75ef1ab0d172223d10a1661f2b43fe2.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-104-dio-48e.c | 73 ++++++++++----------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c index 400c09b905f8..1f7d9bbec0fc 100644 --- a/drivers/gpio/gpio-104-dio-48e.c +++ b/drivers/gpio/gpio-104-dio-48e.c @@ -178,46 +178,25 @@ static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset) return !!(port_state & mask); } +static const size_t ports[] = { 0, 1, 2, 4, 5, 6 }; + static int dio48e_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip); - size_t i; - static const size_t ports[] = { 0, 1, 2, 4, 5, 6 }; - const unsigned int gpio_reg_size = 8; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); + unsigned long offset; + unsigned long gpio_mask; + unsigned int port_addr; unsigned long port_state; /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + port_addr = dio48egpio->base + ports[offset / 8]; + port_state = inb(port_addr) & gpio_mask; - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } - - /* read bits from current gpio port */ - port_state = inb(dio48egpio->base + ports[i]); - - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + bitmap_set_value8(bits, port_state, offset); } return 0; @@ -247,37 +226,27 @@ static void dio48e_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip); - unsigned int i; - const unsigned int gpio_reg_size = 8; - unsigned int port; - unsigned int out_port; - unsigned int bitmask; + unsigned long offset; + unsigned long gpio_mask; + size_t index; + unsigned int port_addr; + unsigned long bitmask; unsigned long flags; - /* set bits are evaluated a gpio register size at a time */ - for (i = 0; i < chip->ngpio; i += gpio_reg_size) { - /* no more set bits in this mask word; skip to the next word */ - if (!mask[BIT_WORD(i)]) { - i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size; - continue; - } + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + index = offset / 8; + port_addr = dio48egpio->base + ports[index]; - port = i / gpio_reg_size; - out_port = (port > 2) ? port + 1 : port; - bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)]; + bitmask = bitmap_get_value8(bits, offset) & gpio_mask; raw_spin_lock_irqsave(&dio48egpio->lock, flags); /* update output state data and set device gpio register */ - dio48egpio->out_state[port] &= ~mask[BIT_WORD(i)]; - dio48egpio->out_state[port] |= bitmask; - outb(dio48egpio->out_state[port], dio48egpio->base + out_port); + dio48egpio->out_state[index] &= ~gpio_mask; + dio48egpio->out_state[index] |= bitmask; + outb(dio48egpio->out_state[index], port_addr); raw_spin_unlock_irqrestore(&dio48egpio->lock, flags); - - /* prepare for next gpio register set */ - mask[BIT_WORD(i)] >>= gpio_reg_size; - bits[BIT_WORD(i)] >>= gpio_reg_size; } } From 9bfcce0db3cff35901fbf5d332f7e1f1fe097cfb Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:08 -0800 Subject: [PATCH 27/86] gpio: 104-idi-48: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/b0631b6d489f85008480399df283ccd33ecfe310.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-104-idi-48.c | 36 +++++++--------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c index c50329ab493a..d350ac0de06b 100644 --- a/drivers/gpio/gpio-104-idi-48.c +++ b/drivers/gpio/gpio-104-idi-48.c @@ -85,42 +85,20 @@ static int idi_48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip); - size_t i; + unsigned long offset; + unsigned long gpio_mask; static const size_t ports[] = { 0, 1, 2, 4, 5, 6 }; - const unsigned int gpio_reg_size = 8; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); + unsigned int port_addr; unsigned long port_state; /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + port_addr = idi48gpio->base + ports[offset / 8]; + port_state = inb(port_addr) & gpio_mask; - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } - - /* read bits from current gpio port */ - port_state = inb(idi48gpio->base + ports[i]); - - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + bitmap_set_value8(bits, port_state, offset); } return 0; From b0f49e9b9e2cf0b4c09998bd806dd6ede6c4ad61 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:11 -0800 Subject: [PATCH 28/86] gpio: gpio-mm: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/0de53d7021b2d6db10294473cd8a1b6102bcec94.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-gpio-mm.c | 73 +++++++++++-------------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c index c22d6f94129c..b89b8c5ff1f5 100644 --- a/drivers/gpio/gpio-gpio-mm.c +++ b/drivers/gpio/gpio-gpio-mm.c @@ -167,46 +167,25 @@ static int gpiomm_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!(port_state & mask); } +static const size_t ports[] = { 0, 1, 2, 4, 5, 6 }; + static int gpiomm_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip); - size_t i; - static const size_t ports[] = { 0, 1, 2, 4, 5, 6 }; - const unsigned int gpio_reg_size = 8; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); + unsigned long offset; + unsigned long gpio_mask; + unsigned int port_addr; unsigned long port_state; /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + port_addr = gpiommgpio->base + ports[offset / 8]; + port_state = inb(port_addr) & gpio_mask; - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } - - /* read bits from current gpio port */ - port_state = inb(gpiommgpio->base + ports[i]); - - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + bitmap_set_value8(bits, port_state, offset); } return 0; @@ -237,37 +216,27 @@ static void gpiomm_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip); - unsigned int i; - const unsigned int gpio_reg_size = 8; - unsigned int port; - unsigned int out_port; - unsigned int bitmask; + unsigned long offset; + unsigned long gpio_mask; + size_t index; + unsigned int port_addr; + unsigned long bitmask; unsigned long flags; - /* set bits are evaluated a gpio register size at a time */ - for (i = 0; i < chip->ngpio; i += gpio_reg_size) { - /* no more set bits in this mask word; skip to the next word */ - if (!mask[BIT_WORD(i)]) { - i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size; - continue; - } + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + index = offset / 8; + port_addr = gpiommgpio->base + ports[index]; - port = i / gpio_reg_size; - out_port = (port > 2) ? port + 1 : port; - bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)]; + bitmask = bitmap_get_value8(bits, offset) & gpio_mask; spin_lock_irqsave(&gpiommgpio->lock, flags); /* update output state data and set device gpio register */ - gpiommgpio->out_state[port] &= ~mask[BIT_WORD(i)]; - gpiommgpio->out_state[port] |= bitmask; - outb(gpiommgpio->out_state[port], gpiommgpio->base + out_port); + gpiommgpio->out_state[index] &= ~gpio_mask; + gpiommgpio->out_state[index] |= bitmask; + outb(gpiommgpio->out_state[index], port_addr); spin_unlock_irqrestore(&gpiommgpio->lock, flags); - - /* prepare for next gpio register set */ - mask[BIT_WORD(i)] >>= gpio_reg_size; - bits[BIT_WORD(i)] >>= gpio_reg_size; } } From acebb82fe9cd4d3dc09e7e28ffa5a5ecc76ae7f8 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:15 -0800 Subject: [PATCH 29/86] gpio: ws16c48: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/7a0d2c964e7f2d289b16c63ff6b06fc1f4c50d4d.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-ws16c48.c | 73 ++++++++++--------------------------- 1 file changed, 20 insertions(+), 53 deletions(-) diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c index fe456bea81f6..cb510df2b014 100644 --- a/drivers/gpio/gpio-ws16c48.c +++ b/drivers/gpio/gpio-ws16c48.c @@ -129,42 +129,19 @@ static int ws16c48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip); - const unsigned int gpio_reg_size = 8; - size_t i; - const size_t num_ports = chip->ngpio / gpio_reg_size; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); + unsigned long offset; + unsigned long gpio_mask; + unsigned int port_addr; unsigned long port_state; /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < num_ports; i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) { + port_addr = ws16c48gpio->base + offset / 8; + port_state = inb(port_addr) & gpio_mask; - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } - - /* read bits from current gpio port */ - port_state = inb(ws16c48gpio->base + i); - - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + bitmap_set_value8(bits, port_state, offset); } return 0; @@ -198,39 +175,29 @@ static void ws16c48_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip); - unsigned int i; - const unsigned int gpio_reg_size = 8; - unsigned int port; - unsigned int iomask; - unsigned int bitmask; + unsigned long offset; + unsigned long gpio_mask; + size_t index; + unsigned int port_addr; + unsigned long bitmask; unsigned long flags; - /* set bits are evaluated a gpio register size at a time */ - for (i = 0; i < chip->ngpio; i += gpio_reg_size) { - /* no more set bits in this mask word; skip to the next word */ - if (!mask[BIT_WORD(i)]) { - i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size; - continue; - } - - port = i / gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) { + index = offset / 8; + port_addr = ws16c48gpio->base + index; /* mask out GPIO configured for input */ - iomask = mask[BIT_WORD(i)] & ~ws16c48gpio->io_state[port]; - bitmask = iomask & bits[BIT_WORD(i)]; + gpio_mask &= ~ws16c48gpio->io_state[index]; + bitmask = bitmap_get_value8(bits, offset) & gpio_mask; raw_spin_lock_irqsave(&ws16c48gpio->lock, flags); /* update output state data and set device gpio register */ - ws16c48gpio->out_state[port] &= ~iomask; - ws16c48gpio->out_state[port] |= bitmask; - outb(ws16c48gpio->out_state[port], ws16c48gpio->base + port); + ws16c48gpio->out_state[index] &= ~gpio_mask; + ws16c48gpio->out_state[index] |= bitmask; + outb(ws16c48gpio->out_state[index], port_addr); raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags); - - /* prepare for next gpio register set */ - mask[BIT_WORD(i)] >>= gpio_reg_size; - bits[BIT_WORD(i)] >>= gpio_reg_size; } } From 2dc7c3c16daac7317ef7458bad6b528aa3330951 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:18 -0800 Subject: [PATCH 30/86] gpio: pci-idio-16: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/b30f131b4634caf5a70f12e01496f71631a17305.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pci-idio-16.c | 79 ++++++++++++--------------------- 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index df51dd08bdfe..638d6656ce73 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -100,45 +100,23 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - size_t i; - const unsigned int gpio_reg_size = 8; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); - unsigned long port_state; + unsigned long offset; + unsigned long gpio_mask; void __iomem *ports[] = { &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15, }; + void __iomem *port_addr; + unsigned long port_state; /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + port_addr = ports[offset / 8]; + port_state = ioread8(port_addr) & gpio_mask; - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } - - /* read bits from current gpio port */ - port_state = ioread8(ports[i]); - - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + bitmap_set_value8(bits, port_state, offset); } return 0; @@ -178,30 +156,31 @@ static void idio_16_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); + unsigned long offset; + unsigned long gpio_mask; + void __iomem *ports[] = { + &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, + }; + size_t index; + void __iomem *port_addr; + unsigned long bitmask; unsigned long flags; - unsigned int out_state; + unsigned long out_state; - raw_spin_lock_irqsave(&idio16gpio->lock, flags); + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + index = offset / 8; + port_addr = ports[index]; - /* process output lines 0-7 */ - if (*mask & 0xFF) { - out_state = ioread8(&idio16gpio->reg->out0_7) & ~*mask; - out_state |= *mask & *bits; - iowrite8(out_state, &idio16gpio->reg->out0_7); + bitmask = bitmap_get_value8(bits, offset) & gpio_mask; + + raw_spin_lock_irqsave(&idio16gpio->lock, flags); + + out_state = ioread8(port_addr) & ~gpio_mask; + out_state |= bitmask; + iowrite8(out_state, port_addr); + + raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); } - - /* shift to next output line word */ - *mask >>= 8; - - /* process output lines 8-15 */ - if (*mask & 0xFF) { - *bits >>= 8; - out_state = ioread8(&idio16gpio->reg->out8_15) & ~*mask; - out_state |= *mask & *bits; - iowrite8(out_state, &idio16gpio->reg->out8_15); - } - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); } static void idio_16_irq_ack(struct irq_data *data) From c586aa8f480598f17e1e7d99d3ca7a7a0a6ffbd3 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:22 -0800 Subject: [PATCH 31/86] gpio: pcie-idio-24: utilize for_each_set_clump8 macro Replace verbose implementation in get_multiple/set_multiple callbacks with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/d5d22fa9809dcf8330f4381dbe7e7ca37990e79f.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pcie-idio-24.c | 107 +++++++++++-------------------- 1 file changed, 39 insertions(+), 68 deletions(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 44c1e4fc489f..1d475794a50f 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -201,52 +201,34 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip); - size_t i; - const unsigned int gpio_reg_size = 8; - unsigned int bits_offset; - size_t word_index; - unsigned int word_offset; - unsigned long word_mask; - const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); - unsigned long port_state; + unsigned long offset; + unsigned long gpio_mask; void __iomem *ports[] = { &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, &idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7, &idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23, }; + size_t index; + unsigned long port_state; const unsigned long out_mode_mask = BIT(1); /* clear bits array to a clean slate */ bitmap_zero(bits, chip->ngpio); - /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; - - /* word index for bits array */ - word_index = BIT_WORD(bits_offset); - - /* gpio offset within current word of bits array */ - word_offset = bits_offset % BITS_PER_LONG; - - /* mask of get bits for current gpio within current word */ - word_mask = mask[word_index] & (port_mask << word_offset); - if (!word_mask) { - /* no get bits in this port so skip to next one */ - continue; - } + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + index = offset / 8; /* read bits from current gpio port (port 6 is TTL GPIO) */ - if (i < 6) - port_state = ioread8(ports[i]); + if (index < 6) + port_state = ioread8(ports[index]); else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) port_state = ioread8(&idio24gpio->reg->ttl_out0_7); else port_state = ioread8(&idio24gpio->reg->ttl_in0_7); - /* store acquired bits at respective bits array offset */ - bits[word_index] |= (port_state << word_offset) & word_mask; + port_state &= gpio_mask; + + bitmap_set_value8(bits, port_state, offset); } return 0; @@ -297,59 +279,48 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip); - size_t i; - unsigned long bits_offset; + unsigned long offset; unsigned long gpio_mask; - const unsigned int gpio_reg_size = 8; - const unsigned long port_mask = GENMASK(gpio_reg_size, 0); - unsigned long flags; - unsigned int out_state; void __iomem *ports[] = { &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, &idio24gpio->reg->out16_23 }; + size_t index; + unsigned long bitmask; + unsigned long flags; + unsigned long out_state; const unsigned long out_mode_mask = BIT(1); - const unsigned int ttl_offset = 48; - const size_t ttl_i = BIT_WORD(ttl_offset); - const unsigned int word_offset = ttl_offset % BITS_PER_LONG; - const unsigned long ttl_mask = (mask[ttl_i] >> word_offset) & port_mask; - const unsigned long ttl_bits = (bits[ttl_i] >> word_offset) & ttl_mask; - /* set bits are processed a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { - /* gpio offset in bits array */ - bits_offset = i * gpio_reg_size; + for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { + index = offset / 8; - /* check if any set bits for current port */ - gpio_mask = (*mask >> bits_offset) & port_mask; - if (!gpio_mask) { - /* no set bits for this port so move on to next port */ - continue; - } + bitmask = bitmap_get_value8(bits, offset) & gpio_mask; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - /* process output lines */ - out_state = ioread8(ports[i]) & ~gpio_mask; - out_state |= (*bits >> bits_offset) & gpio_mask; - iowrite8(out_state, ports[i]); + /* read bits from current gpio port (port 6 is TTL GPIO) */ + if (index < 6) { + out_state = ioread8(ports[index]); + } else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) { + out_state = ioread8(&idio24gpio->reg->ttl_out0_7); + } else { + /* skip TTL GPIO if set for input */ + raw_spin_unlock_irqrestore(&idio24gpio->lock, flags); + continue; + } + + /* set requested bit states */ + out_state &= ~gpio_mask; + out_state |= bitmask; + + /* write bits for current gpio port (port 6 is TTL GPIO) */ + if (index < 6) + iowrite8(out_state, ports[index]); + else + iowrite8(out_state, &idio24gpio->reg->ttl_out0_7); raw_spin_unlock_irqrestore(&idio24gpio->lock, flags); } - - /* check if setting TTL lines and if they are in output mode */ - if (!ttl_mask || !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask)) - return; - - /* handle TTL output */ - raw_spin_lock_irqsave(&idio24gpio->lock, flags); - - /* process output lines */ - out_state = ioread8(&idio24gpio->reg->ttl_out0_7) & ~ttl_mask; - out_state |= ttl_bits; - iowrite8(out_state, &idio24gpio->reg->ttl_out0_7); - - raw_spin_unlock_irqrestore(&idio24gpio->lock, flags); } static void idio_24_irq_ack(struct irq_data *data) From 17b6038942e304c522d0688c2b29b1c630f1cfa6 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:25 -0800 Subject: [PATCH 32/86] gpio: uniphier: utilize for_each_set_clump8 macro Replace verbose implementation in set_multiple callback with for_each_set_clump8 macro to simplify code and improve clarity. An improvement in this case is that banks that are not masked will now be skipped. Link: http://lkml.kernel.org/r/5b24887e97f3093e4832d7c50a1093f537e91ab4.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Cc: Andy Shevchenko Cc: Masahiro Yamada Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Lukas Wunner Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-uniphier.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c index bd203e8fa58e..7ec97499b7f7 100644 --- a/drivers/gpio/gpio-uniphier.c +++ b/drivers/gpio/gpio-uniphier.c @@ -15,9 +15,6 @@ #include #include -#define UNIPHIER_GPIO_BANK_MASK \ - GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0) - #define UNIPHIER_GPIO_IRQ_MAX_NUM 24 #define UNIPHIER_GPIO_PORT_DATA 0x0 /* data */ @@ -150,15 +147,11 @@ static void uniphier_gpio_set(struct gpio_chip *chip, static void uniphier_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { - unsigned int bank, shift, bank_mask, bank_bits; - int i; + unsigned long i, bank, bank_mask, bank_bits; - for (i = 0; i < chip->ngpio; i += UNIPHIER_GPIO_LINES_PER_BANK) { + for_each_set_clump8(i, bank_mask, mask, chip->ngpio) { bank = i / UNIPHIER_GPIO_LINES_PER_BANK; - shift = i % BITS_PER_LONG; - bank_mask = (mask[BIT_WORD(i)] >> shift) & - UNIPHIER_GPIO_BANK_MASK; - bank_bits = bits[BIT_WORD(i)] >> shift; + bank_bits = bitmap_get_value8(bits, i); uniphier_gpio_bank_write(chip, bank, UNIPHIER_GPIO_PORT_DATA, bank_mask, bank_bits); From b2ca9ebfa68f47950e46b12f2d640f84ee5e1c98 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:29 -0800 Subject: [PATCH 33/86] gpio: 74x164: utilize the for_each_set_clump8 macro Replace verbose implementation in set_multiple callback with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/7ea2df7182a50a1136ca36edc46dffcb2446fd27.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Suggested-by: Andy Shevchenko Tested-by: Andy Shevchenko Cc: Geert Uytterhoeven Cc: Phil Reid Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-74x164.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e81307f9754e..05637d585152 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -6,6 +6,7 @@ * Copyright (C) 2010 Miguel Gaio */ +#include #include #include #include @@ -72,20 +73,18 @@ static void gen_74x164_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct gen_74x164_chip *chip = gpiochip_get_data(gc); - unsigned int i, idx, shift; - u8 bank, bankmask; + unsigned long offset; + unsigned long bankmask; + size_t bank; + unsigned long bitmask; mutex_lock(&chip->lock); - for (i = 0, bank = chip->registers - 1; i < chip->registers; - i++, bank--) { - idx = i / sizeof(*mask); - shift = i % sizeof(*mask) * BITS_PER_BYTE; - bankmask = mask[idx] >> shift; - if (!bankmask) - continue; + for_each_set_clump8(offset, bankmask, mask, chip->registers * 8) { + bank = chip->registers - 1 - offset / 8; + bitmask = bitmap_get_value8(bits, offset) & bankmask; chip->buffer[bank] &= ~bankmask; - chip->buffer[bank] |= bankmask & (bits[idx] >> shift); + chip->buffer[bank] |= bitmask; } __gen_74x164_write_config(chip); mutex_unlock(&chip->lock); From 9f00ebf518b80e4d58ab32966772b68511d015f2 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:32 -0800 Subject: [PATCH 34/86] thermal: intel: intel_soc_dts_iosf: Utilize for_each_set_clump8 macro Utilize for_each_set_clump8 macro, and the bitmap_set_value8 and bitmap_get_value8 functions, where appropriate. In addition, remove the now unnecessary temp_mask and temp_shift members of the intel_soc_dts_sensor_entry structure. Link: http://lkml.kernel.org/r/2d3c74e9a00a52954f31d19e04623a7f4bc85520.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Suggested-by: Andy Shevchenko Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/thermal/intel/intel_soc_dts_iosf.c | 31 +++++++++++++--------- drivers/thermal/intel/intel_soc_dts_iosf.h | 2 -- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c index 5716b62e0f73..f75271b669c6 100644 --- a/drivers/thermal/intel/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel/intel_soc_dts_iosf.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -103,6 +104,7 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts, int status; u32 temp_out; u32 out; + unsigned long update_ptps; u32 store_ptps; u32 store_ptmc; u32 store_te_out; @@ -120,8 +122,10 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts, if (status) return status; - out = (store_ptps & ~(0xFF << (thres_index * 8))); - out |= (temp_out & 0xFF) << (thres_index * 8); + update_ptps = store_ptps; + bitmap_set_value8(&update_ptps, temp_out & 0xFF, thres_index * 8); + out = update_ptps; + status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, SOC_DTS_OFFSET_PTPS, out); if (status) @@ -223,6 +227,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, u32 out; struct intel_soc_dts_sensor_entry *dts; struct intel_soc_dts_sensors *sensors; + unsigned long raw; dts = tzd->devdata; sensors = dts->sensors; @@ -231,8 +236,8 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, if (status) return status; - out = (out & dts->temp_mask) >> dts->temp_shift; - out -= SOC_DTS_TJMAX_ENCODING; + raw = out; + out = bitmap_get_value8(&raw, dts->id * 8) - SOC_DTS_TJMAX_ENCODING; *temp = sensors->tj_max - out * 1000; return 0; @@ -280,11 +285,14 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts, int read_only_trip_cnt) { char name[10]; + unsigned long trip; int trip_count = 0; int trip_mask = 0; + int writable_trip_cnt = 0; + unsigned long ptps; u32 store_ptps; + unsigned long i; int ret; - int i; /* Store status to restor on exit */ ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, @@ -293,11 +301,10 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts, goto err_ret; dts->id = id; - dts->temp_mask = 0x00FF << (id * 8); - dts->temp_shift = id * 8; if (notification_support) { trip_count = min(SOC_MAX_DTS_TRIPS, trip_cnt); - trip_mask = BIT(trip_count - read_only_trip_cnt) - 1; + writable_trip_cnt = trip_count - read_only_trip_cnt; + trip_mask = GENMASK(writable_trip_cnt - 1, 0); } /* Check if the writable trip we provide is not used by BIOS */ @@ -306,11 +313,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts, if (ret) trip_mask = 0; else { - for (i = 0; i < trip_count; ++i) { - if (trip_mask & BIT(i)) - if (store_ptps & (0xff << (i * 8))) - trip_mask &= ~BIT(i); - } + ptps = store_ptps; + for_each_set_clump8(i, trip, &ptps, writable_trip_cnt * 8) + trip_mask &= ~BIT(i / 8); } dts->trip_mask = trip_mask; dts->trip_count = trip_count; diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h index adfb09af33fc..c54945748200 100644 --- a/drivers/thermal/intel/intel_soc_dts_iosf.h +++ b/drivers/thermal/intel/intel_soc_dts_iosf.h @@ -24,8 +24,6 @@ struct intel_soc_dts_sensors; struct intel_soc_dts_sensor_entry { int id; - u32 temp_mask; - u32 temp_shift; u32 store_status; u32 trip_mask; u32 trip_count; From 608bd5fda60d9e2083c8bb92f17adfae73afc37a Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:36 -0800 Subject: [PATCH 35/86] gpio: pisosr: utilize the for_each_set_clump8 macro Replace verbose implementation in get_multiple callback with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/8a39ee772247d4b7d752b32dbacc06c1cdcb60b5.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Cc: Andy Shevchenko Cc: Morten Hein Tiljeset Cc: Sean Nyekjaer Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Phil Reid Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pisosr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c index 1331b2a94679..6698feabaced 100644 --- a/drivers/gpio/gpio-pisosr.c +++ b/drivers/gpio/gpio-pisosr.c @@ -96,16 +96,16 @@ static int pisosr_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct pisosr_gpio *gpio = gpiochip_get_data(chip); - unsigned int nbytes = DIV_ROUND_UP(chip->ngpio, 8); - unsigned int i, j; + unsigned long offset; + unsigned long gpio_mask; + unsigned long buffer_state; pisosr_gpio_refresh(gpio); bitmap_zero(bits, chip->ngpio); - for (i = 0; i < nbytes; i++) { - j = i / sizeof(unsigned long); - bits[j] |= ((unsigned long) gpio->buffer[i]) - << (8 * (i % sizeof(unsigned long))); + for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) { + buffer_state = gpio->buffer[offset / 8] & gpio_mask; + bitmap_set_value8(bits, buffer_state, offset); } return 0; From d077c78b45168bc869b5453bb1250325c5ed10ff Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:40 -0800 Subject: [PATCH 36/86] gpio: max3191x: utilize the for_each_set_clump8 macro Replace verbose implementation in get_multiple callback with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/c2b1ed62caf6fce6e5681809a50c05ce6acdf2a6.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Cc: Andy Shevchenko Cc: Mathias Duckeck Cc: Lukas Wunner Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Masahiro Yamada Cc: Morten Hein Tiljeset Cc: Phil Reid Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-max3191x.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c index 0696d5a21431..310d1a248cae 100644 --- a/drivers/gpio/gpio-max3191x.c +++ b/drivers/gpio/gpio-max3191x.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -232,16 +233,20 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask, unsigned long *bits) { struct max3191x_chip *max3191x = gpiochip_get_data(gpio); - int ret, bit = 0, wordlen = max3191x_wordlen(max3191x); + const unsigned int wordlen = max3191x_wordlen(max3191x); + int ret; + unsigned long bit; + unsigned long gpio_mask; + unsigned long in; mutex_lock(&max3191x->lock); ret = max3191x_readout_locked(max3191x); if (ret) goto out_unlock; - while ((bit = find_next_bit(mask, gpio->ngpio, bit)) != gpio->ngpio) { + bitmap_zero(bits, gpio->ngpio); + for_each_set_clump8(bit, gpio_mask, mask, gpio->ngpio) { unsigned int chipnum = bit / MAX3191X_NGPIO; - unsigned long in, shift, index; if (max3191x_chip_is_faulting(max3191x, chipnum)) { ret = -EIO; @@ -249,12 +254,8 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask, } in = ((u8 *)max3191x->xfer.rx_buf)[chipnum * wordlen]; - shift = round_down(bit % BITS_PER_LONG, MAX3191X_NGPIO); - index = bit / BITS_PER_LONG; - bits[index] &= ~(mask[index] & (0xff << shift)); - bits[index] |= mask[index] & (in << shift); /* copy bits */ - - bit = (chipnum + 1) * MAX3191X_NGPIO; /* go to next chip */ + in &= gpio_mask; + bitmap_set_value8(bits, in, bit); } out_unlock: From ae81217edc18135d13e5b6c17609ee2c07af4188 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:51:43 -0800 Subject: [PATCH 37/86] gpio: pca953x: utilize the for_each_set_clump8 macro Replace verbose implementation in set_multiple callback with for_each_set_clump8 macro to simplify code and improve clarity. Link: http://lkml.kernel.org/r/3543ffc3668ad4ed4c00e8ebaf14a5559fd6ddf2.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Tested-by: Andy Shevchenko Cc: Phil Reid Cc: Arnd Bergmann Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Geert Uytterhoeven Cc: Linus Walleij Cc: Lukas Wunner Cc: Masahiro Yamada Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Rasmus Villemoes Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pca953x.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 82122c3c688a..232e3f987511 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -459,7 +460,8 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct pca953x_chip *chip = gpiochip_get_data(gc); - unsigned int bank_mask, bank_val; + unsigned long offset; + unsigned long bank_mask; int bank; u8 reg_val[MAX_BANK]; int ret; @@ -469,15 +471,10 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, if (ret) goto exit; - for (bank = 0; bank < NBANK(chip); bank++) { - bank_mask = mask[bank / sizeof(*mask)] >> - ((bank % sizeof(*mask)) * 8); - if (bank_mask) { - bank_val = bits[bank / sizeof(*bits)] >> - ((bank % sizeof(*bits)) * 8); - bank_val &= bank_mask; - reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val; - } + for_each_set_clump8(offset, bank_mask, mask, gc->ngpio) { + bank = offset / 8; + reg_val[bank] &= ~bank_mask; + reg_val[bank] |= bitmap_get_value8(bits, offset) & bank_mask; } pca953x_write_regs(chip, chip->regs->output, reg_val); From 11d43e62f693c66c8c76c2ea2349e0f3c5764964 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 4 Dec 2019 16:51:47 -0800 Subject: [PATCH 38/86] lib/rbtree: set successor's parent unconditionally Both in Case 2 and 3, we exchange n and s. This mean no matter whether child2 is NULL or not, successor's parent should be assigned to node's. This patch takes this step out to make it explicit and reduce the ambiguity. Besides, this step reduces some symbol size like rb_erase(). KERN_CONFIG upstream patched OPT_FOR_PERF 877 870 OPT_FOR_SIZE 635 621 Link: http://lkml.kernel.org/r/20191028021442.5450-1-richardw.yang@linux.intel.com Signed-off-by: Wei Yang Acked-by: Peter Zijlstra (Intel) Reviewed-by: Michel Lespinasse Reviewed-by: Davidlohr Bueso Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index fdd421b8d9ae..99c42e1a74b8 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -283,14 +283,13 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, __rb_change_child(node, successor, tmp, root); if (child2) { - successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { unsigned long pc2 = successor->__rb_parent_color; - successor->__rb_parent_color = pc; rebalance = __rb_is_black(pc2) ? parent : NULL; } + successor->__rb_parent_color = pc; tmp = successor; } From 8b7569a224a18953b9aee29c375e439b8a6eeb05 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 4 Dec 2019 16:51:50 -0800 Subject: [PATCH 39/86] lib/rbtree: get successor's color directly After move parent assignment out, we can check the color directly. Link: http://lkml.kernel.org/r/20191028021442.5450-2-richardw.yang@linux.intel.com Signed-off-by: Wei Yang Acked-by: Peter Zijlstra (Intel) Reviewed-by: Michel Lespinasse Reviewed-by: Davidlohr Bueso Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 99c42e1a74b8..724b0d036b57 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -286,8 +286,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { - unsigned long pc2 = successor->__rb_parent_color; - rebalance = __rb_is_black(pc2) ? parent : NULL; + rebalance = rb_is_black(successor) ? parent : NULL; } successor->__rb_parent_color = pc; tmp = successor; From dc5c5ad79f0cc2d8756d161dbdee7b370f35f5bb Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 4 Dec 2019 16:51:53 -0800 Subject: [PATCH 40/86] lib/test_meminit.c: add bulk alloc/free tests kmem_cache_alloc_bulk/kmem_cache_free_bulk are used to make multiple allocations of the same size to avoid the overhead of multiple kmalloc/kfree calls. Extend the kmem_cache tests to make some calls to these APIs. Link: http://lkml.kernel.org/r/20191107191447.23058-1-labbott@redhat.com Signed-off-by: Laura Abbott Reviewed-by: Kees Cook Tested-by: Alexander Potapenko Cc: Laura Abbott Cc: Christoph Lameter Cc: Nick Desaulniers Cc: Kostya Serebryany Cc: Dmitry Vyukov Cc: Sandeep Patil Cc: Jann Horn Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_meminit.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9742e5cb853a..e4f706a404b3 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -183,6 +183,9 @@ static bool __init check_buf(void *buf, int size, bool want_ctor, return fail; } +#define BULK_SIZE 100 +static void *bulk_array[BULK_SIZE]; + /* * Test kmem_cache with given parameters: * want_ctor - use a constructor; @@ -203,9 +206,24 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor, want_rcu ? SLAB_TYPESAFE_BY_RCU : 0, want_ctor ? test_ctor : NULL); for (iter = 0; iter < 10; iter++) { + /* Do a test of bulk allocations */ + if (!want_rcu && !want_ctor) { + int ret; + + ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array); + if (!ret) { + fail = true; + } else { + int i; + for (i = 0; i < ret; i++) + fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero); + kmem_cache_free_bulk(c, ret, bulk_array); + } + } + buf = kmem_cache_alloc(c, alloc_mask); /* Check that buf is zeroed, if it must be. */ - fail = check_buf(buf, size, want_ctor, want_rcu, want_zero); + fail |= check_buf(buf, size, want_ctor, want_rcu, want_zero); fill_with_garbage_skip(buf, size, want_ctor ? CTOR_BYTES : 0); if (!want_rcu) { From 323dd2c3ed0641f49e89b4e420f9eef5d3d5a881 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Wed, 4 Dec 2019 16:51:57 -0800 Subject: [PATCH 41/86] lib/math/rational.c: fix possible incorrect result from rational fractions helper In some cases the previous algorithm would not return the closest approximation. This would happen when a semi-convergent was the closest, as the previous algorithm would only consider convergents. As an example, consider an initial value of 5/4, and trying to find the closest approximation with a maximum of 4 for numerator and denominator. The previous algorithm would return 1/1 as the closest approximation, while this version will return the correct answer of 4/3. To do this, the main loop performs effectively the same operations as it did before. It must now keep track of the last three approximations, n2/d2 .. n0/d0, while before it only needed the last two. If an exact answer is not found, the algorithm will now calculate the best semi-convergent term, t, which is a single expression with two divisions: min((max_numerator - n0) / n1, (max_denominator - d0) / d1) This will be used if it is better than previous convergent. The test for this is generally a simple comparison, 2*t > a. But in an edge case, where the convergent's final term is even and the best allowable semi-convergent has a final term of exactly half the convergent's final term, the more complex comparison (d0*dp > d1*d) is used. I also wrote some comments explaining the code. While one still needs to look up the math elsewhere, they should help a lot to follow how the code relates to that math. This routine is used in two places in the video4linux code, but in those cases it is only used to reduce a fraction to lowest terms, which the existing code will do correctly. This could be done more efficiently with a different library routine but it would still be the Euclidean alogrithm at its heart. So no change. The remain users are places where a fractional PLL divider is programmed. What would happen is something asked for a clock of X MHz but instead gets Y MHz, where Y is close to X but not exactly due to the hardware limitations. After this change they might, in some cases, get Y' MHz, where Y' is a little closer to X then Y was. Users like this are: Three UARTs, in 8250_mid, 8250_lpss, and imx. One GPU in vp4_hdmi. And three clock drivers, clk-cdce706, clk-si5351, and clk-fractional-divider. The last is a generic clock driver and so would have more users referenced via device tree entries. I think there's a bug in that one, it's limiting an N bit field that is offset-by-1 to the range 0 .. (1< Cc: Oskar Schirmer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/math/rational.c | 63 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/lib/math/rational.c b/lib/math/rational.c index ba7443677c90..31fb27db2deb 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -3,6 +3,7 @@ * rational fractions * * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * Copyright (C) 2019 Trent Piepho * * helper functions when coping with rational numbers */ @@ -10,6 +11,7 @@ #include #include #include +#include /* * calculate best rational approximation for a given fraction @@ -33,30 +35,65 @@ void rational_best_approximation( unsigned long max_numerator, unsigned long max_denominator, unsigned long *best_numerator, unsigned long *best_denominator) { - unsigned long n, d, n0, d0, n1, d1; + /* n/d is the starting rational, which is continually + * decreased each iteration using the Euclidean algorithm. + * + * dp is the value of d from the prior iteration. + * + * n2/d2, n1/d1, and n0/d0 are our successively more accurate + * approximations of the rational. They are, respectively, + * the current, previous, and two prior iterations of it. + * + * a is current term of the continued fraction. + */ + unsigned long n, d, n0, d0, n1, d1, n2, d2; n = given_numerator; d = given_denominator; n0 = d1 = 0; n1 = d0 = 1; + for (;;) { - unsigned long t, a; - if ((n1 > max_numerator) || (d1 > max_denominator)) { - n1 = n0; - d1 = d0; - break; - } + unsigned long dp, a; + if (d == 0) break; - t = d; + /* Find next term in continued fraction, 'a', via + * Euclidean algorithm. + */ + dp = d; a = n / d; d = n % d; - n = t; - t = n0 + a * n1; + n = dp; + + /* Calculate the current rational approximation (aka + * convergent), n2/d2, using the term just found and + * the two prior approximations. + */ + n2 = n0 + a * n1; + d2 = d0 + a * d1; + + /* If the current convergent exceeds the maxes, then + * return either the previous convergent or the + * largest semi-convergent, the final term of which is + * found below as 't'. + */ + if ((n2 > max_numerator) || (d2 > max_denominator)) { + unsigned long t = min((max_numerator - n0) / n1, + (max_denominator - d0) / d1); + + /* This tests if the semi-convergent is closer + * than the previous convergent. + */ + if (2u * t > a || (2u * t == a && d0 * dp > d1 * d)) { + n1 = n0 + t * n1; + d1 = d0 + t * d1; + } + break; + } n0 = n1; - n1 = t; - t = d0 + a * d1; + n1 = n2; d0 = d1; - d1 = t; + d1 = d2; } *best_numerator = n1; *best_denominator = d1; From fd7eb2513f8549cf2f6b0c323377816268424bed Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 4 Dec 2019 16:52:00 -0800 Subject: [PATCH 42/86] lib/genalloc.c: export symbol addr_in_gen_pool We use addr_in_gen_pool() in a driver module. So export it. Link: http://lkml.kernel.org/r/20181224070622.22197-2-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reviewed-by: Andrew Morton Cc: Alexey Skidanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/genalloc.c b/lib/genalloc.c index 24d20ca7e91b..af9a57422186 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -567,6 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, rcu_read_unlock(); return found; } +EXPORT_SYMBOL(addr_in_gen_pool); /** * gen_pool_avail - get available free space of the pool From 964975ac6677c97ae61ec9d6969dd5d03f18d1c3 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 4 Dec 2019 16:52:03 -0800 Subject: [PATCH 43/86] lib/genalloc.c: rename addr_in_gen_pool to gen_pool_has_addr Follow the kernel conventions, rename addr_in_gen_pool to gen_pool_has_addr. [sjhuang@iluvatar.ai: fix Documentation/ too] Link: http://lkml.kernel.org/r/20181229015914.5573-1-sjhuang@iluvatar.ai Link: http://lkml.kernel.org/r/20181228083950.20398-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reviewed-by: Andrew Morton Cc: Russell King Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/genalloc.rst | 2 +- arch/arm/mm/dma-mapping.c | 2 +- drivers/misc/sram-exec.c | 2 +- include/linux/genalloc.h | 2 +- kernel/dma/remap.c | 2 +- lib/genalloc.c | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst index 098a46f55798..a5af2cbf58a5 100644 --- a/Documentation/core-api/genalloc.rst +++ b/Documentation/core-api/genalloc.rst @@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future. :functions: gen_pool_for_each_chunk .. kernel-doc:: lib/genalloc.c - :functions: addr_in_gen_pool + :functions: gen_pool_has_addr .. kernel-doc:: lib/genalloc.c :functions: gen_pool_avail diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1df6eb42f22e..e822af0d9219 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) static bool __in_atomic_pool(void *start, size_t size) { - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } static int __free_from_pool(void *start, size_t size) diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c index 426ad912b441..d054e2842a5f 100644 --- a/drivers/misc/sram-exec.c +++ b/drivers/misc/sram-exec.c @@ -96,7 +96,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, if (!part) return NULL; - if (!addr_in_gen_pool(pool, (unsigned long)dst, size)) + if (!gen_pool_has_addr(pool, (unsigned long)dst, size)) return NULL; base = (unsigned long)part->base; diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 4bd583bd6934..5b14a0f38124 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, int nid, const char *name); extern struct gen_pool *gen_pool_get(struct device *dev, const char *name); -bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, +extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start, size_t size); #ifdef CONFIG_OF diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index d47bd40fc0f5..d14cbc83986a 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size) if (unlikely(!atomic_pool)) return false; - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) diff --git a/lib/genalloc.c b/lib/genalloc.c index af9a57422186..7f1244b5294a 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -540,7 +540,7 @@ void gen_pool_for_each_chunk(struct gen_pool *pool, EXPORT_SYMBOL(gen_pool_for_each_chunk); /** - * addr_in_gen_pool - checks if an address falls within the range of a pool + * gen_pool_has_addr - checks if an address falls within the range of a pool * @pool: the generic memory pool * @start: start address * @size: size of the region @@ -548,7 +548,7 @@ EXPORT_SYMBOL(gen_pool_for_each_chunk); * Check if the range of addresses falls within the specified pool. Returns * true if the entire range is contained in the pool and false otherwise. */ -bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, +bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start, size_t size) { bool found = false; @@ -567,7 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, rcu_read_unlock(); return found; } -EXPORT_SYMBOL(addr_in_gen_pool); +EXPORT_SYMBOL(gen_pool_has_addr); /** * gen_pool_avail - get available free space of the pool From d439e6a5d78b93a85c0f93ea559548fc109ccc1a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 4 Dec 2019 16:52:06 -0800 Subject: [PATCH 44/86] checkpatch: improve ignoring CamelCase SI style variants like mA Ignore all upper-case variants before and after SI units like mA, mV and uV so uses like RANGE_mA do not emit a CAMELCASE message. Link: http://lkml.kernel.org/r/5ce6f9131327fd2e12d7a0e20a55f588448de090.camel@perches.com Signed-off-by: Joe Perches Cc: Jules Irenge Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 592911a2f06c..da330b3ce140 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5038,8 +5038,9 @@ sub process { $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && #Ignore Page variants $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && -#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) - $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ && +#Ignore SI style variants like nS, mV and dB +#(ie: max_uV, regulator_min_uA_show, RANGE_mA_VALUE) + $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ && #Ignore some three character SI units explicitly, like MiB and KHz $var !~ /^(?:[a-z_]*?)_?(?:[KMGT]iB|[KMGT]?Hz)(?:_[a-z_]+)?$/) { while ($var =~ m{($Ident)}g) { From cd28b119047b9928ddae914dd747f032ab929cc6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 4 Dec 2019 16:52:09 -0800 Subject: [PATCH 45/86] checkpatch: reduce is_maintained_obsolete lookup runtime The is_maintained_obsolete function can be called twice using the same filename. This function spawns a process using get_maintainer.pl. Store the status of each filename when spawned and use the stored result to eliminate the spawning of unnecessary duplicate child processes. Example: old: $ time ./scripts/checkpatch.pl hp100-Move-to-staging.patch > /dev/null real 0m1.767s user 0m1.634s sys 0m0.141s new: $ time ./scripts/checkpatch.pl hp100-Move-to-staging.patch > /dev/null real 0m1.184s user 0m1.085s sys 0m0.103s Link: http://lkml.kernel.org/r/b982566a2b9b4825badce36fdfc3032bd0005151.camel@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index da330b3ce140..7cbe6e72e363 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -874,14 +874,18 @@ sub seed_camelcase_file { } } +our %maintained_status = (); + sub is_maintained_obsolete { my ($filename) = @_; return 0 if (!$tree || !(-e "$root/scripts/get_maintainer.pl")); - my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`; + if (!exists($maintained_status{$filename})) { + $maintained_status{$filename} = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`; + } - return $status =~ /obsolete/i; + return $maintained_status{$filename} =~ /obsolete/i; } sub is_SPDX_License_valid { From f6520c520842c532059c380d7a5b30b6e0e72ced Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 4 Dec 2019 16:52:12 -0800 Subject: [PATCH 46/86] epoll: simplify ep_poll_safewake() for CONFIG_DEBUG_LOCK_ALLOC Currently, ep_poll_safewake() in the CONFIG_DEBUG_LOCK_ALLOC case uses ep_call_nested() in order to pass the correct subclass argument to spin_lock_irqsave_nested(). However, ep_call_nested() adds unnecessary checks for epoll depth and loops that are already verified when doing EPOLL_CTL_ADD. This mirrors a conversion that was done for !CONFIG_DEBUG_LOCK_ALLOC in: commit 37b5e5212a44 ("epoll: remove ep_call_nested() from ep_eventpoll_poll()") Link: http://lkml.kernel.org/r/1567628549-11501-1-git-send-email-jbaron@akamai.com Signed-off-by: Jason Baron Reviewed-by: Roman Penyaev Cc: Davidlohr Bueso Cc: Al Viro Cc: Eric Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c4159bcc05d9..98ef0e2f1b5c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -551,28 +551,23 @@ out_unlock: */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -static struct nested_calls poll_safewake_ncalls; - -static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) -{ - unsigned long flags; - wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie; - - spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1); - wake_up_locked_poll(wqueue, EPOLLIN); - spin_unlock_irqrestore(&wqueue->lock, flags); - - return 0; -} +static DEFINE_PER_CPU(int, wakeup_nest); static void ep_poll_safewake(wait_queue_head_t *wq) { - int this_cpu = get_cpu(); + unsigned long flags; + int subclass; - ep_call_nested(&poll_safewake_ncalls, - ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); - - put_cpu(); + local_irq_save(flags); + preempt_disable(); + subclass = __this_cpu_read(wakeup_nest); + spin_lock_nested(&wq->lock, subclass + 1); + __this_cpu_inc(wakeup_nest); + wake_up_locked_poll(wq, POLLIN); + __this_cpu_dec(wakeup_nest); + spin_unlock(&wq->lock); + local_irq_restore(flags); + preempt_enable(); } #else @@ -2370,11 +2365,6 @@ static int __init eventpoll_init(void) */ ep_nested_calls_init(&poll_loop_ncalls); -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* Initialize the structure used to perform safe poll wait head wake ups */ - ep_nested_calls_init(&poll_safewake_ncalls); -#endif - /* * We can have many thousands of epitems, so prevent this from * using an extra cache line on 64-bit (and smaller) CPUs From 339ddb53d373baee6e7946aec17c739c4924d6d9 Mon Sep 17 00:00:00 2001 From: Heiher Date: Wed, 4 Dec 2019 16:52:15 -0800 Subject: [PATCH 47/86] fs/epoll: remove unnecessary wakeups of nested epoll Take the case where we have: t0 | (ew) e0 | (et) e1 | (lt) s0 t0: thread 0 e0: epoll fd 0 e1: epoll fd 1 s0: socket fd 0 ew: epoll_wait et: edge-trigger lt: level-trigger We remove unnecessary wakeups to prevent the nested epoll that working in edge- triggered mode to waking up continuously. Test code: #include #include #include int main(int argc, char *argv[]) { int sfd[2]; int efd[2]; struct epoll_event e; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sfd) < 0) goto out; efd[0] = epoll_create(1); if (efd[0] < 0) goto out; efd[1] = epoll_create(1); if (efd[1] < 0) goto out; e.events = EPOLLIN; if (epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e) < 0) goto out; e.events = EPOLLIN | EPOLLET; if (epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e) < 0) goto out; if (write(sfd[1], "w", 1) != 1) goto out; if (epoll_wait(efd[0], &e, 1, 0) != 1) goto out; if (epoll_wait(efd[0], &e, 1, 0) != 0) goto out; close(efd[0]); close(efd[1]); close(sfd[0]); close(sfd[1]); return 0; out: return -1; } More tests: https://github.com/heiher/epoll-wakeup Link: http://lkml.kernel.org/r/20191009060516.3577-1-r@hev.cc Signed-off-by: hev Reviewed-by: Roman Penyaev Cc: Al Viro Cc: Davide Libenzi Cc: Davidlohr Bueso Cc: Dominik Brodowski Cc: Eric Wong Cc: Jason Baron Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 98ef0e2f1b5c..67a395039268 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -666,7 +666,6 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, void *priv, int depth, bool ep_locked) { __poll_t res; - int pwake = 0; struct epitem *epi, *nepi; LIST_HEAD(txlist); @@ -733,26 +732,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, */ list_splice(&txlist, &ep->rdllist); __pm_relax(ep->ws); - - if (!list_empty(&ep->rdllist)) { - /* - * Wake up (if active) both the eventpoll wait list and - * the ->poll() wait list (delayed after we release the lock). - */ - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } write_unlock_irq(&ep->lock); if (!ep_locked) mutex_unlock(&ep->mtx); - /* We have to call this outside the lock */ - if (pwake) - ep_poll_safewake(&ep->poll_wait); - return res; } From f2728fe80cefb40a8f486e64a4bef17ce2c64f5b Mon Sep 17 00:00:00 2001 From: Heiher Date: Wed, 4 Dec 2019 16:52:19 -0800 Subject: [PATCH 48/86] selftests: add epoll selftests This adds the promised selftest for epoll. It will verify the wakeups of epoll. Including leaf and nested mode, epoll_wait() and poll() and multi-threads. Link: http://lkml.kernel.org/r/20191009121518.4027-1-r@hev.cc Signed-off-by: hev Reviewed-by: Roman Penyaev Cc: Jason Baron Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 1 + .../selftests/filesystems/epoll/.gitignore | 1 + .../selftests/filesystems/epoll/Makefile | 7 + .../filesystems/epoll/epoll_wakeup_test.c | 3074 +++++++++++++++++ 4 files changed, 3083 insertions(+) create mode 100644 tools/testing/selftests/filesystems/epoll/.gitignore create mode 100644 tools/testing/selftests/filesystems/epoll/Makefile create mode 100644 tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d67f968eac21..b001c602414b 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -13,6 +13,7 @@ TARGETS += efivarfs TARGETS += exec TARGETS += filesystems TARGETS += filesystems/binderfs +TARGETS += filesystems/epoll TARGETS += firmware TARGETS += ftrace TARGETS += futex diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore new file mode 100644 index 000000000000..9ae8db44ec14 --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/.gitignore @@ -0,0 +1 @@ +epoll_wakeup_test diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile new file mode 100644 index 000000000000..e62f3d4f68da --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +LDFLAGS += -lpthread +TEST_GEN_PROGS := epoll_wakeup_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c new file mode 100644 index 000000000000..37a04dab56f0 --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -0,0 +1,3074 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "../../kselftest_harness.h" + +struct epoll_mtcontext +{ + int efd[3]; + int sfd[4]; + int count; + + pthread_t main; + pthread_t waiter; +}; + +static void signal_handler(int signum) +{ +} + +static void kill_timeout(struct epoll_mtcontext *ctx) +{ + usleep(1000000); + pthread_kill(ctx->main, SIGUSR1); + pthread_kill(ctx->waiter, SIGUSR1); +} + +static void *waiter_entry1a(void *data) +{ + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry1ap(void *data) +{ + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx->count, 1); + } + + return NULL; +} + +static void *waiter_entry1o(void *data) +{ + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry1op(void *data) +{ + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx->count, 1); + } + + return NULL; +} + +static void *waiter_entry2a(void *data) +{ + struct epoll_event events[2]; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry2ap(void *data) +{ + struct pollfd pfd; + struct epoll_event events[2]; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], events, 2, 0) > 0) + __sync_fetch_and_add(&ctx->count, 1); + } + + return NULL; +} + +static void *emitter_entry1(void *data) +{ + struct epoll_mtcontext *ctx = data; + + usleep(100000); + write(ctx->sfd[1], "w", 1); + + kill_timeout(ctx); + + return NULL; +} + +static void *emitter_entry2(void *data) +{ + struct epoll_mtcontext *ctx = data; + + usleep(100000); + write(ctx->sfd[1], "w", 1); + write(ctx->sfd[3], "w", 1); + + kill_timeout(ctx); + + return NULL; +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * s0 + */ +TEST(epoll1) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * s0 + */ +TEST(epoll2) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll3) +{ + int efd; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll4) +{ + int efd; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * s0 + */ +TEST(epoll5) +{ + int efd; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * s0 + */ +TEST(epoll6) +{ + int efd; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 0); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ + +TEST(epoll7) +{ + int efd; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll8) +{ + int efd; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * s0 + */ +TEST(epoll9) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * s0 + */ +TEST(epoll10) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll11) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll12) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * s0 + */ +TEST(epoll13) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * s0 + */ +TEST(epoll14) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll15) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll16) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll17) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll18) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll19) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll20) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll21) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll22) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll23) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll24) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll25) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll26) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll27) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll28) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll29) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll30) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll31) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll32) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll33) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll34) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll35) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll36) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll37) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll38) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx.count, 2); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll39) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll40) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx.count, 2); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll41) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll42) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll43) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll44) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll45) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll46) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll47) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll48) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll49) +{ + int efd[3]; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll50) +{ + int efd[3]; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll51) +{ + int efd[3]; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll52) +{ + int efd[3]; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll53) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll54) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll55) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll56) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (p) \ / (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll57) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + pfd.fd = ctx.efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (p) \ / (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll58) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + pfd.fd = ctx.efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +TEST_HARNESS_MAIN From 81696d5d544e665674b4f7a09e1304986fbbcbc6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 Dec 2019 16:52:22 -0800 Subject: [PATCH 49/86] fs/binfmt_elf.c: delete unused "interp_map_addr" argument Link: http://lkml.kernel.org/r/20191005165049.GA26927@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5372eabd276a..43eb8a78e84d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -544,7 +544,7 @@ static inline int make_prot(u32 p_flags) an ELF header */ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, - struct file *interpreter, unsigned long *interp_map_addr, + struct file *interpreter, unsigned long no_base, struct elf_phdr *interp_elf_phdata) { struct elf_phdr *eppnt; @@ -590,8 +590,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size); total_size = 0; - if (!*interp_map_addr) - *interp_map_addr = map_addr; error = map_addr; if (BAD_ADDR(map_addr)) goto out; @@ -1054,11 +1052,8 @@ out_free_interp: } if (interpreter) { - unsigned long interp_map_addr = 0; - elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, - &interp_map_addr, load_bias, interp_elf_phdata); if (!IS_ERR((void *)elf_entry)) { /* From 658c033565118549a07f15927aa6939f481e4712 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 Dec 2019 16:52:25 -0800 Subject: [PATCH 50/86] fs/binfmt_elf.c: extract elf_read() function ELF reads done by the kernel have very complicated error detection code which better live in one place. Link: http://lkml.kernel.org/r/20191005165215.GB26927@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 43eb8a78e84d..ecd8d2698515 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -404,6 +404,17 @@ static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) ELF_PAGESTART(cmds[first_idx].p_vaddr); } +static int elf_read(struct file *file, void *buf, size_t len, loff_t pos) +{ + ssize_t rv; + + rv = kernel_read(file, buf, len, &pos); + if (unlikely(rv != len)) { + return (rv < 0) ? rv : -EIO; + } + return 0; +} + /** * load_elf_phdrs() - load ELF program headers * @elf_ex: ELF header of the binary whose program headers should be loaded @@ -418,7 +429,6 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, { struct elf_phdr *elf_phdata = NULL; int retval, err = -1; - loff_t pos = elf_ex->e_phoff; unsigned int size; /* @@ -439,9 +449,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, goto out; /* Read in the program headers */ - retval = kernel_read(elf_file, elf_phdata, size, &pos); - if (retval != size) { - err = (retval < 0) ? retval : -EIO; + retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); + if (retval < 0) { + err = retval; goto out; } @@ -720,7 +730,6 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_ppnt = elf_phdata; for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { char *elf_interpreter; - loff_t pos; if (elf_ppnt->p_type != PT_INTERP) continue; @@ -738,14 +747,10 @@ static int load_elf_binary(struct linux_binprm *bprm) if (!elf_interpreter) goto out_free_ph; - pos = elf_ppnt->p_offset; - retval = kernel_read(bprm->file, elf_interpreter, - elf_ppnt->p_filesz, &pos); - if (retval != elf_ppnt->p_filesz) { - if (retval >= 0) - retval = -EIO; + retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz, + elf_ppnt->p_offset); + if (retval < 0) goto out_free_interp; - } /* make sure path is NULL terminated */ retval = -ENOEXEC; if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') @@ -764,14 +769,10 @@ static int load_elf_binary(struct linux_binprm *bprm) would_dump(bprm, interpreter); /* Get the exec headers */ - pos = 0; - retval = kernel_read(interpreter, &loc->interp_elf_ex, - sizeof(loc->interp_elf_ex), &pos); - if (retval != sizeof(loc->interp_elf_ex)) { - if (retval >= 0) - retval = -EIO; + retval = elf_read(interpreter, &loc->interp_elf_ex, + sizeof(loc->interp_elf_ex), 0); + if (retval < 0) goto out_free_dentry; - } break; @@ -1174,11 +1175,10 @@ static int load_elf_library(struct file *file) unsigned long elf_bss, bss, len; int retval, error, i, j; struct elfhdr elf_ex; - loff_t pos = 0; error = -ENOEXEC; - retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos); - if (retval != sizeof(elf_ex)) + retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0); + if (retval < 0) goto out; if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) @@ -1203,9 +1203,8 @@ static int load_elf_library(struct file *file) eppnt = elf_phdata; error = -ENOEXEC; - pos = elf_ex.e_phoff; - retval = kernel_read(file, eppnt, j, &pos); - if (retval != j) + retval = elf_read(file, eppnt, j, elf_ex.e_phoff); + if (retval < 0) goto out_free_ph; for (j = 0, i = 0; i Date: Wed, 4 Dec 2019 16:52:28 -0800 Subject: [PATCH 51/86] init/Kconfig: fix indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ / /' -i */Kconfig Link: http://lkml.kernel.org/r/1574306670-30234-1-git-send-email-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Cc: Jiri Kosina Cc: Masahiro Yamada Cc: Greg Kroah-Hartman Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 76 ++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index d7163fcf233b..a34064a031a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -146,13 +146,13 @@ config LOCALVERSION_AUTO which is done within the script "scripts/setlocalversion".) config BUILD_SALT - string "Build ID Salt" - default "" - help - The build ID is used to link binaries and their debug info. Setting - this option will use the value in the calculation of the build id. - This is mostly useful for distributions which want to ensure the - build is unique between builds. It's safe to leave the default. + string "Build ID Salt" + default "" + help + The build ID is used to link binaries and their debug info. Setting + this option will use the value in the calculation of the build id. + This is mostly useful for distributions which want to ensure the + build is unique between builds. It's safe to leave the default. config HAVE_KERNEL_GZIP bool @@ -818,7 +818,7 @@ menuconfig CGROUPS if CGROUPS config PAGE_COUNTER - bool + bool config MEMCG bool "Memory controller" @@ -1311,9 +1311,9 @@ menuconfig EXPERT select DEBUG_KERNEL help This option allows certain base kernel options and settings - to be disabled or tweaked. This is for specialized - environments which can tolerate a "non-standard" kernel. - Only use this if you really know what you are doing. + to be disabled or tweaked. This is for specialized + environments which can tolerate a "non-standard" kernel. + Only use this if you really know what you are doing. config UID16 bool "Enable 16-bit UID system calls" if EXPERT @@ -1406,11 +1406,11 @@ config BUG bool "BUG() support" if EXPERT default y help - Disabling this option eliminates support for BUG and WARN, reducing - the size of your kernel image and potentially quietly ignoring - numerous fatal conditions. You should only consider disabling this - option for embedded systems with no facilities for reporting errors. - Just say Y. + Disabling this option eliminates support for BUG and WARN, reducing + the size of your kernel image and potentially quietly ignoring + numerous fatal conditions. You should only consider disabling this + option for embedded systems with no facilities for reporting errors. + Just say Y. config ELF_CORE depends on COREDUMP @@ -1426,8 +1426,8 @@ config PCSPKR_PLATFORM select I8253_LOCK default y help - This option allows to disable the internal PC-Speaker - support, saving some memory. + This option allows to disable the internal PC-Speaker + support, saving some memory. config BASE_FULL default y @@ -1545,29 +1545,29 @@ config MEMBARRIER If unsure, say Y. config KALLSYMS - bool "Load all symbols for debugging/ksymoops" if EXPERT - default y - help - Say Y here to let the kernel print out symbolic crash information and - symbolic stack backtraces. This increases the size of the kernel - somewhat, as all symbols have to be loaded into the kernel image. + bool "Load all symbols for debugging/ksymoops" if EXPERT + default y + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel + somewhat, as all symbols have to be loaded into the kernel image. config KALLSYMS_ALL bool "Include all symbols in kallsyms" depends on DEBUG_KERNEL && KALLSYMS help - Normally kallsyms only contains the symbols of functions for nicer - OOPS messages and backtraces (i.e., symbols from the text and inittext - sections). This is sufficient for most cases. And only in very rare - cases (e.g., when a debugger is used) all symbols are required (e.g., - names of variables from the data sections, etc). + Normally kallsyms only contains the symbols of functions for nicer + OOPS messages and backtraces (i.e., symbols from the text and inittext + sections). This is sufficient for most cases. And only in very rare + cases (e.g., when a debugger is used) all symbols are required (e.g., + names of variables from the data sections, etc). - This option makes sure that all symbols are loaded into the kernel - image (i.e., symbols from all sections) in cost of increased kernel - size (depending on the kernel configuration, it may be 300KiB or - something like this). + This option makes sure that all symbols are loaded into the kernel + image (i.e., symbols from all sections) in cost of increased kernel + size (depending on the kernel configuration, it may be 300KiB or + something like this). - Say N unless you really need all symbols. + Say N unless you really need all symbols. config KALLSYMS_ABSOLUTE_PERCPU bool @@ -1710,12 +1710,12 @@ config DEBUG_PERF_USE_VMALLOC depends on PERF_EVENTS && DEBUG_KERNEL && !PPC select PERF_USE_VMALLOC help - Use vmalloc memory to back perf mmap() buffers. + Use vmalloc memory to back perf mmap() buffers. - Mostly useful for debugging the vmalloc code on platforms - that don't require it. + Mostly useful for debugging the vmalloc code on platforms + that don't require it. - Say N if unsure. + Say N if unsure. endmenu From 48d6b4dd362c4f3a6032946397385b28ff8d2b9c Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 4 Dec 2019 16:52:31 -0800 Subject: [PATCH 52/86] drivers/rapidio/rio-driver.c: fix missing include of Include for the missing declarations of functions exported from this file. Fixes the following sparse warnings: drivers/rapidio/rio-driver.c:53:16: warning: symbol 'rio_dev_get' was not declared. Should it be static? drivers/rapidio/rio-driver.c:70:6: warning: symbol 'rio_dev_put' was not declared. Should it be static? drivers/rapidio/rio-driver.c:150:5: warning: symbol 'rio_register_driver' was not declared. Should it be static? drivers/rapidio/rio-driver.c:169:6: warning: symbol 'rio_unregister_driver' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20191017114923.10888-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 2d99a3712b72..72874153972e 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "rio.h" From bd7bca4335a55561b627149322e93de1b25404c1 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 4 Dec 2019 16:52:34 -0800 Subject: [PATCH 53/86] drivers/rapidio/rio-access.c: fix missing include of Include for the missing declarations of functions exported from this file. Fixes the following sparse warnings: drivers/rapidio/rio-access.c:59:1: warning: symbol '__rio_local_read_config_8' was not declared. Should it be static? drivers/rapidio/rio-access.c:60:1: warning: symbol '__rio_local_read_config_16' was not declared. Should it be static? drivers/rapidio/rio-access.c:61:1: warning: symbol '__rio_local_read_config_32' was not declared. Should it be static? drivers/rapidio/rio-access.c:62:1: warning: symbol '__rio_local_write_config_8' was not declared. Should it be static? drivers/rapidio/rio-access.c:63:1: warning: symbol '__rio_local_write_config_16' was not declared. Should it be static? drivers/rapidio/rio-access.c:64:1: warning: symbol '__rio_local_write_config_32' was not declared. Should it be static? drivers/rapidio/rio-access.c:112:1: warning: symbol 'rio_mport_read_config_8' was not declared. Should it be static? drivers/rapidio/rio-access.c:113:1: warning: symbol 'rio_mport_read_config_16' was not declared. Should it be static? drivers/rapidio/rio-access.c:114:1: warning: symbol 'rio_mport_read_config_32' was not declared. Should it be static? drivers/rapidio/rio-access.c:115:1: warning: symbol 'rio_mport_write_config_8' was not declared. Should it be static? drivers/rapidio/rio-access.c:116:1: warning: symbol 'rio_mport_write_config_16' was not declared. Should it be static? drivers/rapidio/rio-access.c:117:1: warning: symbol 'rio_mport_write_config_32' was not declared. Should it be static? drivers/rapidio/rio-access.c:136:5: warning: symbol 'rio_mport_send_doorbell' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20191017115103.684-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-access.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c index 33c8d1ecc988..f9e10647f94e 100644 --- a/drivers/rapidio/rio-access.c +++ b/drivers/rapidio/rio-access.c @@ -9,6 +9,8 @@ #include #include +#include + /* * Wrappers for all RIO configuration access functions. They just check * alignment and call the low-level functions pointed to by rio_mport->ops. From 5bf8bec3f4ce044a223c40cbce92590d938f0e9c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 4 Dec 2019 16:52:37 -0800 Subject: [PATCH 54/86] drm: limit to INT_MAX in create_blob ioctl The hardened usercpy code is too paranoid ever since commit 6a30afa8c1fb ("uaccess: disallow > INT_MAX copy sizes") Code itself should have been fine as-is. Link: http://lkml.kernel.org/r/20191106164755.31478-1-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Reported-by: syzbot+fb77e97ebf0612ee6914@syzkaller.appspotmail.com Fixes: 6a30afa8c1fb ("uaccess: disallow > INT_MAX copy sizes") Cc: Kees Cook Cc: Alexander Viro Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 892ce636ef72..6ee04803c362 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, struct drm_property_blob *blob; int ret; - if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) + if (!length || length > INT_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); From 6d13de1489b6bf539695f96d945de3860e6d5e17 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Dec 2019 16:52:40 -0800 Subject: [PATCH 55/86] uaccess: disallow > INT_MAX copy sizes As we've done with VFS, string operations, etc, reject usercopy sizes larger than INT_MAX, which would be nice to have for catching bugs related to size calculation overflows[1]. This adds 10 bytes to x86_64 defconfig text and 1980 bytes to the data section: text data bss dec hex filename 19691167 5134320 1646664 26472151 193eed7 vmlinux.before 19691177 5136300 1646664 26474141 193f69d vmlinux.after [1] https://marc.info/?l=linux-s390&m=156631939010493&w=2 Link: http://lkml.kernel.org/r/201908251612.F9902D7A@keescook Signed-off-by: Kees Cook Suggested-by: Dan Carpenter Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 659a4400517b..e93e249a4e9b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -147,6 +147,8 @@ check_copy_size(const void *addr, size_t bytes, bool is_source) __bad_copy_to(); return false; } + if (WARN_ON_ONCE(bytes > INT_MAX)) + return false; check_object_size(addr, bytes, is_source); return true; } From eec028c9386ed1a692aa01a85b55952202b41619 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:43 -0800 Subject: [PATCH 56/86] kcov: remote coverage support Patch series " kcov: collect coverage from usb and vhost", v3. This patchset extends kcov to allow collecting coverage from backgound kernel threads. This extension requires custom annotations for each of the places where coverage collection is desired. This patchset implements this for hub events in the USB subsystem and for vhost workers. See the first patch description for details about the kcov extension. The other two patches apply this kcov extension to USB and vhost. Examples of other subsystems that might potentially benefit from this when custom annotations are added (the list is based on process_one_work() callers for bugs recently reported by syzbot): 1. fs: writeback wb_workfn() worker, 2. net: addrconf_dad_work()/addrconf_verify_work() workers, 3. net: neigh_periodic_work() worker, 4. net/p9: p9_write_work()/p9_read_work() workers, 5. block: blk_mq_run_work_fn() worker. These patches have been used to enable coverage-guided USB fuzzing with syzkaller for the last few years, see the details here: https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md This patchset has been pushed to the public Linux kernel Gerrit instance: https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524 This patch (of 3): Add background thread coverage collection ability to kcov. With KCOV_ENABLE coverage is collected only for syscalls that are issued from the current process. With KCOV_REMOTE_ENABLE it's possible to collect coverage for arbitrary parts of the kernel code, provided that those parts are annotated with kcov_remote_start()/kcov_remote_stop(). This allows to collect coverage from two types of kernel background threads: the global ones, that are spawned during kernel boot in a limited number of instances (e.g. one USB hub_event() worker thread is spawned per USB HCD); and the local ones, that are spawned when a user interacts with some kernel interface (e.g. vhost workers). To enable collecting coverage from a global background thread, a unique global handle must be assigned and passed to the corresponding kcov_remote_start() call. Then a userspace process can pass a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the kcov_remote_arg struct. This will attach the used kcov device to the code sections, that are referenced by those handles. Since there might be many local background threads spawned from different userspace processes, we can't use a single global handle per annotation. Instead, the userspace process passes a non-zero handle through the common_handle field of the kcov_remote_arg struct. This common handle gets saved to the kcov_handle field in the current task_struct and needs to be passed to the newly spawned threads via custom annotations. Those threads should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). Internally kcov stores handles as u64 integers. The top byte of a handle is used to denote the id of a subsystem that this handle belongs to, and the lower 4 bytes are used to denote the id of a thread instance within that subsystem. A reserved value 0 is used as a subsystem id for common handles as they don't belong to a particular subsystem. The bytes 4-7 are currently reserved and must be zero. In the future the number of bytes used for the subsystem or handle ids might be increased. When a particular userspace process collects coverage by via a common handle, kcov will collect coverage for each code section that is annotated to use the common handle obtained as kcov_handle from the current task_struct. However non common handles allow to collect coverage selectively from different subsystems. Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Arnd Bergmann Cc: Steven Rostedt Cc: David Windsor Cc: Elena Reshetova Cc: Anders Roxell Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 129 ++++++++ include/linux/kcov.h | 23 ++ include/linux/sched.h | 8 + include/uapi/linux/kcov.h | 28 ++ kernel/kcov.c | 547 +++++++++++++++++++++++++++++-- 5 files changed, 700 insertions(+), 35 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 42b612677799..36890b026e77 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted:: Coverage collection ------------------- + The following program demonstrates coverage collection from within a test program using kcov: @@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end). Comparison operands collection ------------------------------ + Comparison operands collection is similar to coverage collection: .. code-block:: c @@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection: Note that the kcov modes (coverage collection or comparison operands) are mutually exclusive. + +Remote coverage collection +-------------------------- + +With KCOV_ENABLE coverage is collected only for syscalls that are issued +from the current process. With KCOV_REMOTE_ENABLE it's possible to collect +coverage for arbitrary parts of the kernel code, provided that those parts +are annotated with kcov_remote_start()/kcov_remote_stop(). + +This allows to collect coverage from two types of kernel background +threads: the global ones, that are spawned during kernel boot in a limited +number of instances (e.g. one USB hub_event() worker thread is spawned per +USB HCD); and the local ones, that are spawned when a user interacts with +some kernel interface (e.g. vhost workers). + +To enable collecting coverage from a global background thread, a unique +global handle must be assigned and passed to the corresponding +kcov_remote_start() call. Then a userspace process can pass a list of such +handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the +kcov_remote_arg struct. This will attach the used kcov device to the code +sections, that are referenced by those handles. + +Since there might be many local background threads spawned from different +userspace processes, we can't use a single global handle per annotation. +Instead, the userspace process passes a non-zero handle through the +common_handle field of the kcov_remote_arg struct. This common handle gets +saved to the kcov_handle field in the current task_struct and needs to be +passed to the newly spawned threads via custom annotations. Those threads +should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). + +Internally kcov stores handles as u64 integers. The top byte of a handle +is used to denote the id of a subsystem that this handle belongs to, and +the lower 4 bytes are used to denote the id of a thread instance within +that subsystem. A reserved value 0 is used as a subsystem id for common +handles as they don't belong to a particular subsystem. The bytes 4-7 are +currently reserved and must be zero. In the future the number of bytes +used for the subsystem or handle ids might be increased. + +When a particular userspace proccess collects coverage by via a common +handle, kcov will collect coverage for each code section that is annotated +to use the common handle obtained as kcov_handle from the current +task_struct. However non common handles allow to collect coverage +selectively from different subsystems. + +.. code-block:: c + + struct kcov_remote_arg { + unsigned trace_mode; + unsigned area_size; + unsigned num_handles; + uint64_t common_handle; + uint64_t handles[0]; + }; + + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) + #define KCOV_DISABLE _IO('c', 101) + #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + + #define COVER_SIZE (64 << 10) + + #define KCOV_TRACE_PC 0 + + #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) + #define KCOV_SUBSYSTEM_USB (0x01ull << 56) + + #define KCOV_SUBSYSTEM_MASK (0xffull << 56) + #define KCOV_INSTANCE_MASK (0xffffffffull) + + static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) + { + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; + } + + #define KCOV_COMMON_ID 0x42 + #define KCOV_USB_BUS_NUM 1 + + int main(int argc, char **argv) + { + int fd; + unsigned long *cover, n, i; + struct kcov_remote_arg *arg; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + + /* Enable coverage collection via common handle and from USB bus #1. */ + arg = calloc(1, sizeof(*arg) + sizeof(uint64_t)); + if (!arg) + perror("calloc"), exit(1); + arg->trace_mode = KCOV_TRACE_PC; + arg->area_size = COVER_SIZE; + arg->num_handles = 1; + arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, + KCOV_COMMON_ID); + arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, + KCOV_USB_BUS_NUM); + if (ioctl(fd, KCOV_REMOTE_ENABLE, arg)) + perror("ioctl"), free(arg), exit(1); + free(arg); + + /* + * Here the user needs to trigger execution of a kernel code section + * that is either annotated with the common handle, or to trigger some + * activity on USB bus #1. + */ + sleep(2); + + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) + printf("0x%lx\n", cover[i + 1]); + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b76a1807028d..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -37,12 +37,35 @@ do { \ (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ } while (0) +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} static inline void kcov_prepare_switch(struct task_struct *t) {} static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cd97d9dd021..467d26046416 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,8 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; @@ -1221,6 +1223,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 9529867717a8..409d3ad1e6e2 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -4,9 +4,24 @@ #include +/* + * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst + * and the comment before kcov_remote_start() for usage details. + */ +struct kcov_remote_arg { + unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + unsigned int area_size; /* Length of coverage buffer in words */ + unsigned int num_handles; /* Size of handles array */ + __u64 common_handle; + __u64 handles[0]; +}; + +#define KCOV_REMOTE_MAX_HANDLES 0x100 + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) enum { /* @@ -32,4 +47,17 @@ enum { #define KCOV_CMP_SIZE(n) ((n) << 1) #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) +#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) +#define KCOV_SUBSYSTEM_USB (0x01ull << 56) + +#define KCOV_SUBSYSTEM_MASK (0xffull << 56) +#define KCOV_INSTANCE_MASK (0xffffffffull) + +static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) +{ + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; +} + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index 2ee38727844a..f50354202dbe 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -21,8 +22,11 @@ #include #include #include +#include #include +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + /* Number of 64-bit words written per one comparison: */ #define KCOV_WORDS_PER_CMP 4 @@ -44,19 +48,100 @@ struct kcov { * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * in_interrupt() returns false (e.g. preempt_schedule_irq()). * READ_ONCE()/barrier() effectively provides load-acquire wrt * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). + * kcov_start(). */ barrier(); return mode == needed_mode; @@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) +{ + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); + t->kcov_size = 0; + t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); + t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + static void kcov_get(struct kcov *kcov) { refcount_inc(&kcov->refcount); @@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov) static void kcov_put(struct kcov *kcov) { if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); vfree(kcov->area); kfree(kcov); } } -void kcov_task_init(struct task_struct *t) -{ - WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); - barrier(); - t->kcov_size = 0; - t->kcov_area = NULL; - t->kcov = NULL; -} - void kcov_task_exit(struct task_struct *t) { struct kcov *kcov; @@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + /* * Fault in a lazily-faulted vmalloc area before it can be used by * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the @@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov) READ_ONCE(area[offset]); } +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - if (arg == KCOV_TRACE_PC) - kcov->mode = KCOV_MODE_TRACE_PC; - else if (arg == KCOV_TRACE_CMP) -#ifdef CONFIG_KCOV_ENABLE_COMPARISONS - kcov->mode = KCOV_MODE_TRACE_CMP; -#else - return -ENOTSUPP; -#endif - else - return -EINVAL; + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; kcov_fault_in_area(kcov); - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in check_kcov_mode(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = struct_size(remote_arg, handles, + remote_num_handles); + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* From 95d23dc27bde0ab4b25f7ade5e2fddc08dd97d9b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:47 -0800 Subject: [PATCH 57/86] usb, kcov: collect coverage from hub_event Add kcov_remote_start()/kcov_remote_stop() annotations to the hub_event() function, which is responsible for processing events on USB buses, in particular events that happen during USB device enumeration. Since hub_event() is run in a global background kernel thread (see Documentation/dev-tools/kcov.rst for details), each USB bus gets a unique global handle from the USB subsystem kcov handle range. As the result kcov can now be used to collect coverage from events that happen on a particular USB bus. [akpm@linux-foundation.org: avoid patch conflicts to make life easier for Andrew] Link: http://lkml.kernel.org/r/de4fe1c219db2d002d905dc1736e2a3bfa1db997.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Greg Kroah-Hartman Cc: Alan Stern Cc: Alexander Potapenko Cc: Anders Roxell Cc: Arnd Bergmann Cc: David Windsor Cc: Dmitry Vyukov Cc: Elena Reshetova Cc: Jason Wang Cc: Marco Elver Cc: "Michael S. Tsirkin" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/core/hub.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1709895387b9..f229ad6952c0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -5484,6 +5485,8 @@ static void hub_event(struct work_struct *work) hub_dev = hub->intfdev; intf = to_usb_interface(hub_dev); + kcov_remote_start_usb((u64)hdev->bus->busnum); + dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", hdev->state, hdev->maxchild, /* NOTE: expects max 15 ports... */ @@ -5590,6 +5593,8 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); kref_put(&hub->kref, hub_release); + + kcov_remote_stop(); } static const struct usb_device_id hub_id_table[] = { From 8f6a7f96dc29cefe16ab60f06f9c3a43510b96fd Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:50 -0800 Subject: [PATCH 58/86] vhost, kcov: collect coverage from vhost_worker Add kcov_remote_start()/kcov_remote_stop() annotations to the vhost_worker() function, which is responsible for processing vhost works. Since vhost_worker() threads are spawned per vhost device instance the common kcov handle is used for kcov_remote_start()/stop() annotations (see Documentation/dev-tools/kcov.rst for details). As the result kcov can now be used to collect coverage from vhost worker threads. Link: http://lkml.kernel.org/r/e49d5d154e5da6c9ada521d2b7ce10a49ce9f98b.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alan Stern Cc: Alexander Potapenko Cc: Anders Roxell Cc: Arnd Bergmann Cc: David Windsor Cc: Dmitry Vyukov Cc: Elena Reshetova Cc: Greg Kroah-Hartman Cc: Jason Wang Cc: Marco Elver Cc: "Michael S. Tsirkin" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/vhost/vhost.c | 6 ++++++ drivers/vhost/vhost.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 36ca2cf419bf..f44340b41494 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "vhost.h" @@ -357,7 +358,9 @@ static int vhost_worker(void *data) llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); + kcov_remote_start_common(dev->kcov_handle); work->fn(work); + kcov_remote_stop(); if (need_resched()) schedule(); } @@ -546,6 +549,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); + dev->kcov_handle = kcov_common_handle(); worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); if (IS_ERR(worker)) { err = PTR_ERR(worker); @@ -571,6 +575,7 @@ err_worker: if (dev->mm) mmput(dev->mm); dev->mm = NULL; + dev->kcov_handle = 0; err_mm: return err; } @@ -682,6 +687,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; + dev->kcov_handle = 0; } if (dev->mm) mmput(dev->mm); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index e9ed2722b633..a123fd70847e 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -173,6 +173,7 @@ struct vhost_dev { int iov_limit; int weight; int byte_weight; + u64 kcov_handle; }; bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); From ce5c31db3645b649a31044a4d8b6057f6c723702 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 4 Dec 2019 16:52:53 -0800 Subject: [PATCH 59/86] lib/ubsan: don't serialize UBSAN report At the moment, UBSAN report will be serialized using a spin_lock(). On RT-systems, spinlocks are turned to rt_spin_lock and may sleep. This will result to the following splat if the undefined behavior is in a context that can sleep: BUG: sleeping function called from invalid context at /src/linux/kernel/locking/rtmutex.c:968 in_atomic(): 1, irqs_disabled(): 128, pid: 3447, name: make 1 lock held by make/3447: #0: 000000009a966332 (&mm->mmap_sem){++++}, at: do_page_fault+0x140/0x4f8 irq event stamp: 6284 hardirqs last enabled at (6283): [] _raw_spin_unlock_irqrestore+0x90/0xa0 hardirqs last disabled at (6284): [] _raw_spin_lock_irqsave+0x30/0x78 softirqs last enabled at (2430): [] fpsimd_restore_current_state+0x60/0xe8 softirqs last disabled at (2427): [] fpsimd_restore_current_state+0x28/0xe8 Preemption disabled at: [] rt_mutex_futex_unlock+0x4c/0xb0 CPU: 3 PID: 3447 Comm: make Tainted: G W 5.2.14-rt7-01890-ge6e057589653 #911 Call trace: dump_backtrace+0x0/0x148 show_stack+0x14/0x20 dump_stack+0xbc/0x104 ___might_sleep+0x154/0x210 rt_spin_lock+0x68/0xa0 ubsan_prologue+0x30/0x68 handle_overflow+0x64/0xe0 __ubsan_handle_add_overflow+0x10/0x18 __lock_acquire+0x1c28/0x2a28 lock_acquire+0xf0/0x370 _raw_spin_lock_irqsave+0x58/0x78 rt_mutex_futex_unlock+0x4c/0xb0 rt_spin_unlock+0x28/0x70 get_page_from_freelist+0x428/0x2b60 __alloc_pages_nodemask+0x174/0x1708 alloc_pages_vma+0x1ac/0x238 __handle_mm_fault+0x4ac/0x10b0 handle_mm_fault+0x1d8/0x3b0 do_page_fault+0x1c8/0x4f8 do_translation_fault+0xb8/0xe0 do_mem_abort+0x3c/0x98 el0_da+0x20/0x24 The spin_lock() will protect against multiple CPUs to output a report together, I guess to prevent them from being interleaved. However, they can still interleave with other messages (and even splat from __might_sleep). So the lock usefulness seems pretty limited. Rather than trying to accomodate RT-system by switching to a raw_spin_lock(), the lock is now completely dropped. Link: http://lkml.kernel.org/r/20190920100835.14999-1-julien.grall@arm.com Signed-off-by: Julien Grall Reported-by: Andre Przywara Acked-by: Andrey Ryabinin Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 64 +++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index fc552d524ef7..7b9b58aee72c 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type, } } -static DEFINE_SPINLOCK(report_lock); - -static void ubsan_prologue(struct source_location *location, - unsigned long *flags) +static void ubsan_prologue(struct source_location *location) { current->in_ubsan++; - spin_lock_irqsave(&report_lock, *flags); pr_err("========================================" "========================================\n"); print_source_location("UBSAN: Undefined behaviour in", location); } -static void ubsan_epilogue(unsigned long *flags) +static void ubsan_epilogue(void) { dump_stack(); pr_err("========================================" "========================================\n"); - spin_unlock_irqrestore(&report_lock, *flags); + current->in_ubsan--; } @@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs, { struct type_descriptor *type = data->type; - unsigned long flags; char lhs_val_str[VALUE_LENGTH]; char rhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs); val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs); @@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs, rhs_val_str, type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } void __ubsan_handle_add_overflow(struct overflow_data *data, @@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow); void __ubsan_handle_negate_overflow(struct overflow_data *data, void *old_val) { - unsigned long flags; char old_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val); pr_err("negation of %s cannot be represented in type %s:\n", old_val_str, data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_negate_overflow); @@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow); void __ubsan_handle_divrem_overflow(struct overflow_data *data, void *lhs, void *rhs) { - unsigned long flags; char rhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs); @@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, else pr_err("division by zero\n"); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], (void *)ptr, data->type->type_name); pr_err("which requires %ld byte alignment\n", data->alignment); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); pr_err("for an object of type %s\n", data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, @@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index) { - unsigned long flags; char index_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(index_str, sizeof(index_str), data->index_type, index); pr_err("index %s is out of range for type %s\n", index_str, data->array_type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_out_of_bounds); void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, void *lhs, void *rhs) { - unsigned long flags; struct type_descriptor *rhs_type = data->rhs_type; struct type_descriptor *lhs_type = data->lhs_type; char rhs_str[VALUE_LENGTH]; @@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, if (suppress_report(&data->location)) goto out; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs); val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs); @@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, lhs_str, rhs_str, lhs_type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); out: user_access_restore(ua_flags); } @@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { - unsigned long flags; - - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); pr_err("calling __builtin_unreachable()\n"); - ubsan_epilogue(&flags); + ubsan_epilogue(); panic("can't return from __builtin_unreachable()"); } EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); @@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); void __ubsan_handle_load_invalid_value(struct invalid_value_data *data, void *val) { - unsigned long flags; char val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(val_str, sizeof(val_str), data->type, val); pr_err("load of value %s is not a valid value for type %s\n", val_str, data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); From 5b009673594d569674a9e0e60109f6a1723075b0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:52:57 -0800 Subject: [PATCH 60/86] arch: ipcbuf.h: make uapi asm/ipcbuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/ipcbuf.h.s In file included from usr/include/asm/ipcbuf.h:1:0, from :32: usr/include/asm-generic/ipcbuf.h:21:2: error: unknown type name `__kernel_key_t' __kernel_key_t key; ^~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:22:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t uid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:23:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t gid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:24:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t cuid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:25:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t cgid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:26:2: error: unknown type name `__kernel_mode_t' __kernel_mode_t mode; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:28:35: error: `__kernel_mode_t' undeclared here (not in a function) unsigned char __pad1[4 - sizeof(__kernel_mode_t)]; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:31:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused1; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:32:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused2; ^~~~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-1-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/uapi/asm/ipcbuf.h | 2 ++ arch/sparc/include/uapi/asm/ipcbuf.h | 2 ++ arch/xtensa/include/uapi/asm/ipcbuf.h | 2 ++ include/uapi/asm-generic/ipcbuf.h | 2 ++ usr/include/Makefile | 1 - 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h index 5b1c4f47c656..1030cd186899 100644 --- a/arch/s390/include/uapi/asm/ipcbuf.h +++ b/arch/s390/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __S390_IPCBUF_H__ #define __S390_IPCBUF_H__ +#include + /* * The user_ipc_perm structure for S/390 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..5b933a598a33 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __SPARC_IPCBUF_H #define __SPARC_IPCBUF_H +#include + /* * The ipc64_perm structure for sparc/sparc64 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h index a57afa0b606f..3bd0642f6660 100644 --- a/arch/xtensa/include/uapi/asm/ipcbuf.h +++ b/arch/xtensa/include/uapi/asm/ipcbuf.h @@ -12,6 +12,8 @@ #ifndef _XTENSA_IPCBUF_H #define _XTENSA_IPCBUF_H +#include + /* * Pad space is left for: * - 32-bit mode_t and seq diff --git a/include/uapi/asm-generic/ipcbuf.h b/include/uapi/asm-generic/ipcbuf.h index 7d80dbd336fb..41a01b494fc7 100644 --- a/include/uapi/asm-generic/ipcbuf.h +++ b/include/uapi/asm-generic/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __ASM_GENERIC_IPCBUF_H #define __ASM_GENERIC_IPCBUF_H +#include + /* * The generic ipc64_perm structure: * Note extra padding because this structure is passed back and forth diff --git a/usr/include/Makefile b/usr/include/Makefile index 5acd9bb6d714..00cc763ec488 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/ipcbuf.h header-test- += asm/msgbuf.h header-test- += asm/sembuf.h header-test- += asm/shmbuf.h From 9ef0e004181956e158fb7ceb9b43810a193f80cd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:53:00 -0800 Subject: [PATCH 61/86] arch: msgbuf.h: make uapi asm/msgbuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/msgbuf.h.s In file included from usr/include/asm/msgbuf.h:6:0, from :32: usr/include/asm-generic/msgbuf.h:25:20: error: field `msg_perm' has incomplete type struct ipc64_perm msg_perm; ^~~~~~~~ usr/include/asm-generic/msgbuf.h:27:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_stime; /* last msgsnd time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:28:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_rtime; /* last msgrcv time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:29:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_ctime; /* last change time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:41:2: error: unknown type name `__kernel_pid_t' __kernel_pid_t msg_lspid; /* pid of last msgsnd */ ^~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:42:2: error: unknown type name `__kernel_pid_t' __kernel_pid_t msg_lrpid; /* last receive pid */ ^~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-2-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/uapi/asm/msgbuf.h | 1 + arch/parisc/include/uapi/asm/msgbuf.h | 1 + arch/powerpc/include/uapi/asm/msgbuf.h | 2 ++ arch/sparc/include/uapi/asm/msgbuf.h | 2 ++ arch/x86/include/uapi/asm/msgbuf.h | 3 +++ arch/xtensa/include/uapi/asm/msgbuf.h | 2 ++ include/uapi/asm-generic/msgbuf.h | 2 ++ usr/include/Makefile | 1 - 8 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h index 9e0c2e230274..128af72f2dfe 100644 --- a/arch/mips/include/uapi/asm/msgbuf.h +++ b/arch/mips/include/uapi/asm/msgbuf.h @@ -2,6 +2,7 @@ #ifndef _ASM_MSGBUF_H #define _ASM_MSGBUF_H +#include /* * The msqid64_ds structure for the MIPS architecture. diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h index 3b877335da38..3b4de5b668c3 100644 --- a/arch/parisc/include/uapi/asm/msgbuf.h +++ b/arch/parisc/include/uapi/asm/msgbuf.h @@ -3,6 +3,7 @@ #define _PARISC_MSGBUF_H #include +#include /* * The msqid64_ds structure for parisc architecture, copied from sparc. diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h index 969bd83e4d3d..7919b2ba41b5 100644 --- a/arch/powerpc/include/uapi/asm/msgbuf.h +++ b/arch/powerpc/include/uapi/asm/msgbuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_MSGBUF_H #define _ASM_POWERPC_MSGBUF_H +#include + /* * The msqid64_ds structure for the PowerPC architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h index eeeb91933280..0954552da188 100644 --- a/arch/sparc/include/uapi/asm/msgbuf.h +++ b/arch/sparc/include/uapi/asm/msgbuf.h @@ -2,6 +2,8 @@ #ifndef _SPARC_MSGBUF_H #define _SPARC_MSGBUF_H +#include + /* * The msqid64_ds structure for sparc64 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h index 7c5bb43ed8af..b3d0664fadc9 100644 --- a/arch/x86/include/uapi/asm/msgbuf.h +++ b/arch/x86/include/uapi/asm/msgbuf.h @@ -5,6 +5,9 @@ #if !defined(__x86_64__) || !defined(__ILP32__) #include #else + +#include + /* * The msqid64_ds structure for x86 architecture with x32 ABI. * diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h index d6915e9f071c..1ed2c85b693a 100644 --- a/arch/xtensa/include/uapi/asm/msgbuf.h +++ b/arch/xtensa/include/uapi/asm/msgbuf.h @@ -17,6 +17,8 @@ #ifndef _XTENSA_MSGBUF_H #define _XTENSA_MSGBUF_H +#include + struct msqid64_ds { struct ipc64_perm msg_perm; #ifdef __XTENSA_EB__ diff --git a/include/uapi/asm-generic/msgbuf.h b/include/uapi/asm-generic/msgbuf.h index af95aa89012e..6504d7b741ce 100644 --- a/include/uapi/asm-generic/msgbuf.h +++ b/include/uapi/asm-generic/msgbuf.h @@ -3,6 +3,8 @@ #define __ASM_GENERIC_MSGBUF_H #include +#include + /* * generic msqid64_ds structure. * diff --git a/usr/include/Makefile b/usr/include/Makefile index 00cc763ec488..6bd22f83b0df 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/msgbuf.h header-test- += asm/sembuf.h header-test- += asm/shmbuf.h header-test- += asm/signal.h From 0fb9dc28679a627f84974165c8011e0630529ece Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:53:03 -0800 Subject: [PATCH 62/86] arch: sembuf.h: make uapi asm/sembuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/sembuf.h.s In file included from :32:0: usr/include/asm/sembuf.h:17:20: error: field `sem_perm' has incomplete type struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ ^~~~~~~~ usr/include/asm/sembuf.h:24:2: error: unknown type name `__kernel_time_t' __kernel_time_t sem_otime; /* last semop time */ ^~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:25:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused1; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:26:2: error: unknown type name `__kernel_time_t' __kernel_time_t sem_ctime; /* last change time */ ^~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:27:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused2; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:29:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:30:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused3; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:31:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused4; ^~~~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-3-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/uapi/asm/sembuf.h | 2 ++ arch/parisc/include/uapi/asm/sembuf.h | 1 + arch/powerpc/include/uapi/asm/sembuf.h | 2 ++ arch/sparc/include/uapi/asm/sembuf.h | 2 ++ arch/x86/include/uapi/asm/sembuf.h | 2 ++ arch/xtensa/include/uapi/asm/sembuf.h | 1 + include/uapi/asm-generic/sembuf.h | 1 + usr/include/Makefile | 1 - 8 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h index 43e1b4a2f68a..ba7fe0c89e7d 100644 --- a/arch/mips/include/uapi/asm/sembuf.h +++ b/arch/mips/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_SEMBUF_H #define _ASM_SEMBUF_H +#include + /* * The semid64_ds structure for the MIPS architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h index 8241cf126018..e2ca4301c7fb 100644 --- a/arch/parisc/include/uapi/asm/sembuf.h +++ b/arch/parisc/include/uapi/asm/sembuf.h @@ -3,6 +3,7 @@ #define _PARISC_SEMBUF_H #include +#include /* * The semid64_ds structure for parisc architecture. diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h index 008ae77c6746..85e96ccb5f0f 100644 --- a/arch/powerpc/include/uapi/asm/sembuf.h +++ b/arch/powerpc/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_SEMBUF_H #define _ASM_POWERPC_SEMBUF_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h index cbcbaa4e7128..10621d066d79 100644 --- a/arch/sparc/include/uapi/asm/sembuf.h +++ b/arch/sparc/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _SPARC_SEMBUF_H #define _SPARC_SEMBUF_H +#include + /* * The semid64_ds structure for sparc architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index 93030e97269a..71205b02a191 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SEMBUF_H #define _ASM_X86_SEMBUF_H +#include + /* * The semid64_ds structure for x86 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h index 09f348d643f1..3b9cdd406dfe 100644 --- a/arch/xtensa/include/uapi/asm/sembuf.h +++ b/arch/xtensa/include/uapi/asm/sembuf.h @@ -22,6 +22,7 @@ #define _XTENSA_SEMBUF_H #include +#include struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h index 137606018c6a..0e709bd3d730 100644 --- a/include/uapi/asm-generic/sembuf.h +++ b/include/uapi/asm-generic/sembuf.h @@ -3,6 +3,7 @@ #define __ASM_GENERIC_SEMBUF_H #include +#include /* * The semid64_ds structure for x86 architecture. diff --git a/usr/include/Makefile b/usr/include/Makefile index 6bd22f83b0df..4a753a48767b 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/sembuf.h header-test- += asm/shmbuf.h header-test- += asm/signal.h header-test- += asm/ucontext.h From 17b6753ff08bc47f50da09f5185849172c598315 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:06 -0800 Subject: [PATCH 63/86] lib/test_bitmap: force argument of bitmap_parselist_user() to proper address space Patch series "gpio: pca953x: Convert to bitmap (extended) API", v2. While converting gpio-pca953x driver to bitmap API, I noticed that we have no function to replace bits. So, that's how patch 7 appears. First 6 patches are preparatory of the test suite (including some warning fixes, etc). Patches 8-9 are preparatory for the GPIO driver to be easier converted to bitmap API, conversion to which happens in patch 10. Patch 11 contains simple indentation fixes. This patch (of 11): Sparse complains: lib/test_bitmap.c:345:58: warning: incorrect type in argument 1 (different address spaces) lib/test_bitmap.c:345:58: expected char const [noderef] *ubuf lib/test_bitmap.c:345:58: got char const *const in Force argument of bitmap_parselist_user() to proper address space. Link: http://lkml.kernel.org/r/20191022172922.61232-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Rasmus Villemoes Cc: Yury Norov Cc: William Breathitt Gray Cc: Geert Uytterhoeven Cc: Thomas Petazzoni Cc: Marek Vasut Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index dc167c13eb39..09aa29a6b562 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -330,7 +330,7 @@ static void __init __test_bitmap_parselist(int is_user) set_fs(KERNEL_DS); time = ktime_get(); - err = bitmap_parselist_user(ptest.in, len, + err = bitmap_parselist_user((__force const char __user *)ptest.in, len, bmap, ptest.nbits); time = ktime_get() - time; set_fs(orig_fs); From 54224044096e02d6df361333f5528940e3cd3f89 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:09 -0800 Subject: [PATCH 64/86] lib/test_bitmap: undefine macros after use There is no need to keep step and ptest macros defined in entire file. Link: http://lkml.kernel.org/r/20191022172922.61232-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 09aa29a6b562..d2fa94e45a46 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -311,6 +311,8 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { {-EINVAL, "a-31:10/1", NULL, 8, 0}, {-EINVAL, "0-31:a/1", NULL, 8, 0}, {-EINVAL, "0-\n", NULL, 8, 0}, + +#undef step }; static void __init __test_bitmap_parselist(int is_user) @@ -357,6 +359,8 @@ static void __init __test_bitmap_parselist(int is_user) if (ptest.flags & PARSE_TIME) pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n", mode, i, ptest.in, time); + +#undef ptest } } From a4881d1cbc6c36424bf11dec9d484b9d19b927ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:13 -0800 Subject: [PATCH 65/86] lib/test_bitmap: name EXP_BYTES properly EXP_BYTES has been wrongly named. It's a size of the exp array in bits. While here, go ahead and rename to EXP1_IN_BITS to avoid double renaming when exp will be renamed to exp1 in the next patch Link: http://lkml.kernel.org/r/20191022172922.61232-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index d2fa94e45a46..38f923411ada 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -374,17 +374,17 @@ static void __init test_bitmap_parselist_user(void) __test_bitmap_parselist(1); } -#define EXP_BYTES (sizeof(exp) * 8) +#define EXP1_IN_BITS (sizeof(exp) * 8) static void __init test_bitmap_arr32(void) { unsigned int nbits, next_bit; - u32 arr[sizeof(exp) / 4]; - DECLARE_BITMAP(bmap2, EXP_BYTES); + u32 arr[EXP1_IN_BITS / 32]; + DECLARE_BITMAP(bmap2, EXP1_IN_BITS); memset(arr, 0xa5, sizeof(arr)); - for (nbits = 0; nbits < EXP_BYTES; ++nbits) { + for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) { bitmap_to_arr32(arr, exp, nbits); bitmap_from_arr32(bmap2, arr, nbits); expect_eq_bitmap(bmap2, exp, nbits); @@ -396,7 +396,7 @@ static void __init test_bitmap_arr32(void) " tail is not safely cleared: %d\n", nbits, next_bit); - if (nbits < EXP_BYTES - 32) + if (nbits < EXP1_IN_BITS - 32) expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)], 0xa5a5a5a5); } From 0ee312e38042c868be9eb973d6e639e6d67b84e2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:17 -0800 Subject: [PATCH 66/86] lib/test_bitmap: rename exp to exp1 to avoid ambiguous name One function is using exp as local variable. Avoid ambiguous naming by rename global one to exp1. Link: http://lkml.kernel.org/r/20191022172922.61232-5-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 38f923411ada..6b70166ac960 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -247,7 +247,7 @@ struct test_bitmap_parselist{ const int flags; }; -static const unsigned long exp[] __initconst = { +static const unsigned long exp1[] __initconst = { BITMAP_FROM_U64(1), BITMAP_FROM_U64(2), BITMAP_FROM_U64(0x0000ffff), @@ -271,29 +271,29 @@ static const unsigned long exp2[] __initconst = { static const struct test_bitmap_parselist parselist_tests[] __initconst = { #define step (sizeof(u64) / sizeof(unsigned long)) - {0, "0", &exp[0], 8, 0}, - {0, "1", &exp[1 * step], 8, 0}, - {0, "0-15", &exp[2 * step], 32, 0}, - {0, "16-31", &exp[3 * step], 32, 0}, - {0, "0-31:1/2", &exp[4 * step], 32, 0}, - {0, "1-31:1/2", &exp[5 * step], 32, 0}, - {0, "0-31:1/4", &exp[6 * step], 32, 0}, - {0, "1-31:1/4", &exp[7 * step], 32, 0}, - {0, "0-31:4/4", &exp[8 * step], 32, 0}, - {0, "1-31:4/4", &exp[9 * step], 32, 0}, - {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0}, - {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0}, - {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp[11 * step], 64, 0}, + {0, "0", &exp1[0], 8, 0}, + {0, "1", &exp1[1 * step], 8, 0}, + {0, "0-15", &exp1[2 * step], 32, 0}, + {0, "16-31", &exp1[3 * step], 32, 0}, + {0, "0-31:1/2", &exp1[4 * step], 32, 0}, + {0, "1-31:1/2", &exp1[5 * step], 32, 0}, + {0, "0-31:1/4", &exp1[6 * step], 32, 0}, + {0, "1-31:1/4", &exp1[7 * step], 32, 0}, + {0, "0-31:4/4", &exp1[8 * step], 32, 0}, + {0, "1-31:4/4", &exp1[9 * step], 32, 0}, + {0, "0-31:1/4,32-63:2/4", &exp1[10 * step], 64, 0}, + {0, "0-31:3/4,32-63:4/4", &exp1[11 * step], 64, 0}, + {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp1[11 * step], 64, 0}, {0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0}, {0, "0-2047:128/256", NULL, 2048, PARSE_TIME}, - {0, "", &exp[12 * step], 8, 0}, - {0, "\n", &exp[12 * step], 8, 0}, - {0, ",, ,, , , ,", &exp[12 * step], 8, 0}, - {0, " , ,, , , ", &exp[12 * step], 8, 0}, - {0, " , ,, , , \n", &exp[12 * step], 8, 0}, + {0, "", &exp1[12 * step], 8, 0}, + {0, "\n", &exp1[12 * step], 8, 0}, + {0, ",, ,, , , ,", &exp1[12 * step], 8, 0}, + {0, " , ,, , , ", &exp1[12 * step], 8, 0}, + {0, " , ,, , , \n", &exp1[12 * step], 8, 0}, {-EINVAL, "-1", NULL, 8, 0}, {-EINVAL, "-0", NULL, 8, 0}, @@ -374,7 +374,7 @@ static void __init test_bitmap_parselist_user(void) __test_bitmap_parselist(1); } -#define EXP1_IN_BITS (sizeof(exp) * 8) +#define EXP1_IN_BITS (sizeof(exp1) * 8) static void __init test_bitmap_arr32(void) { @@ -385,9 +385,9 @@ static void __init test_bitmap_arr32(void) memset(arr, 0xa5, sizeof(arr)); for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) { - bitmap_to_arr32(arr, exp, nbits); + bitmap_to_arr32(arr, exp1, nbits); bitmap_from_arr32(bmap2, arr, nbits); - expect_eq_bitmap(bmap2, exp, nbits); + expect_eq_bitmap(bmap2, exp1, nbits); next_bit = find_next_bit(bmap2, round_up(nbits, BITS_PER_LONG), nbits); From c21dd8a7bb4b8b9cab5dc335a5fa3afa2df1a831 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:20 -0800 Subject: [PATCH 67/86] lib/test_bitmap: move exp1 and exp2 upper for others to use Some test cases may re-use predefined exp1 and exp2 bitmaps. Move them upper in the file. Link: http://lkml.kernel.org/r/20191022172922.61232-6-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6b70166ac960..449d0882b488 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -21,6 +21,26 @@ static unsigned failed_tests __initdata; static char pbl_buffer[PAGE_SIZE] __initdata; +static const unsigned long exp1[] __initconst = { + BITMAP_FROM_U64(1), + BITMAP_FROM_U64(2), + BITMAP_FROM_U64(0x0000ffff), + BITMAP_FROM_U64(0xffff0000), + BITMAP_FROM_U64(0x55555555), + BITMAP_FROM_U64(0xaaaaaaaa), + BITMAP_FROM_U64(0x11111111), + BITMAP_FROM_U64(0x22222222), + BITMAP_FROM_U64(0xffffffff), + BITMAP_FROM_U64(0xfffffffe), + BITMAP_FROM_U64(0x3333333311111111ULL), + BITMAP_FROM_U64(0xffffffff77777777ULL), + BITMAP_FROM_U64(0), +}; + +static const unsigned long exp2[] __initconst = { + BITMAP_FROM_U64(0x3333333311111111ULL), + BITMAP_FROM_U64(0xffffffff77777777ULL), +}; static bool __init __check_eq_uint(const char *srcfile, unsigned int line, @@ -247,27 +267,6 @@ struct test_bitmap_parselist{ const int flags; }; -static const unsigned long exp1[] __initconst = { - BITMAP_FROM_U64(1), - BITMAP_FROM_U64(2), - BITMAP_FROM_U64(0x0000ffff), - BITMAP_FROM_U64(0xffff0000), - BITMAP_FROM_U64(0x55555555), - BITMAP_FROM_U64(0xaaaaaaaa), - BITMAP_FROM_U64(0x11111111), - BITMAP_FROM_U64(0x22222222), - BITMAP_FROM_U64(0xffffffff), - BITMAP_FROM_U64(0xfffffffe), - BITMAP_FROM_U64(0x3333333311111111ULL), - BITMAP_FROM_U64(0xffffffff77777777ULL), - BITMAP_FROM_U64(0), -}; - -static const unsigned long exp2[] __initconst = { - BITMAP_FROM_U64(0x3333333311111111ULL), - BITMAP_FROM_U64(0xffffffff77777777ULL) -}; - static const struct test_bitmap_parselist parselist_tests[] __initconst = { #define step (sizeof(u64) / sizeof(unsigned long)) From 780ff33b8bfac4f44fcc399a870d50ff2e74b33d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:23 -0800 Subject: [PATCH 68/86] lib/test_bitmap: fix comment about this file This test case file is about bitmap API, and not printf() facility. Link: http://lkml.kernel.org/r/20191022172922.61232-7-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 449d0882b488..4544847cf81e 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Test cases for printf facility. + * Test cases for bitmap API. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt From 30544ed5de431fe25d3793e4dd5a058d877c4d77 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:26 -0800 Subject: [PATCH 69/86] lib/bitmap: introduce bitmap_replace() helper In some drivers we want to have a single operation over bitmap which is an equivalent to: *dst = (*old & ~(*mask)) | (*new & *mask) Introduce bitmap_replace() helper for this. Link: http://lkml.kernel.org/r/20191022172922.61232-8-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Rasmus Villemoes Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 16 ++++++++++++++++ lib/bitmap.c | 12 ++++++++++++ lib/test_bitmap.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 9f046609e809..ff335b22f23c 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -53,6 +53,7 @@ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -140,6 +141,9 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +extern void __bitmap_replace(unsigned long *dst, + const unsigned long *old, const unsigned long *new, + const unsigned long *mask, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, @@ -434,6 +438,18 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr __bitmap_shift_left(dst, src, shift, nbits); } +static inline void bitmap_replace(unsigned long *dst, + const unsigned long *old, + const unsigned long *new, + const unsigned long *mask, + unsigned int nbits) +{ + if (small_const_nbits(nbits)) + *dst = (*old & ~(*mask)) | (*new & *mask); + else + __bitmap_replace(dst, old, new, mask, nbits); +} + static inline int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *maskp, int nmaskbits) { diff --git a/lib/bitmap.c b/lib/bitmap.c index f9e834841e94..4250519d7d1c 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -222,6 +222,18 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_andnot); +void __bitmap_replace(unsigned long *dst, + const unsigned long *old, const unsigned long *new, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int k; + unsigned int nr = BITS_TO_LONGS(nbits); + + for (k = 0; k < nr; k++) + dst[k] = (old[k] & ~mask[k]) | (new[k] & mask[k]); +} +EXPORT_SYMBOL(__bitmap_replace); + int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 4544847cf81e..e14a15ac250b 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -42,6 +42,19 @@ static const unsigned long exp2[] __initconst = { BITMAP_FROM_U64(0xffffffff77777777ULL), }; +/* Fibonacci sequence */ +static const unsigned long exp2_to_exp3_mask[] __initconst = { + BITMAP_FROM_U64(0x008000020020212eULL), +}; +/* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */ +static const unsigned long exp3_0_1[] __initconst = { + BITMAP_FROM_U64(0x33b3333311313137ULL), +}; +/* exp3_1_0 = (exp2[1] & ~exp2_to_exp3_mask) | (exp2[0] & exp2_to_exp3_mask) */ +static const unsigned long exp3_1_0[] __initconst = { + BITMAP_FROM_U64(0xff7fffff77575751ULL), +}; + static bool __init __check_eq_uint(const char *srcfile, unsigned int line, const unsigned int exp_uint, unsigned int x) @@ -257,6 +270,30 @@ static void __init test_copy(void) expect_eq_pbl("0-108,128-1023", bmap2, 1024); } +#define EXP2_IN_BITS (sizeof(exp2) * 8) + +static void __init test_replace(void) +{ + unsigned int nbits = 64; + DECLARE_BITMAP(bmap, 1024); + + bitmap_zero(bmap, 1024); + bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits); + expect_eq_bitmap(bmap, exp3_0_1, nbits); + + bitmap_zero(bmap, 1024); + bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits); + expect_eq_bitmap(bmap, exp3_1_0, nbits); + + bitmap_fill(bmap, 1024); + bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits); + expect_eq_bitmap(bmap, exp3_0_1, nbits); + + bitmap_fill(bmap, 1024); + bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits); + expect_eq_bitmap(bmap, exp3_1_0, nbits); +} + #define PARSE_TIME 0x1 struct test_bitmap_parselist{ @@ -476,6 +513,7 @@ static void __init selftest(void) test_zero_clear(); test_fill_set(); test_copy(); + test_replace(); test_bitmap_arr32(); test_bitmap_parselist(); test_bitmap_parselist_user(); From a97832f224893d6155aa785773df70263cfe94ef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:30 -0800 Subject: [PATCH 70/86] gpio: pca953x: remove redundant variable and check in IRQ handler We always will have at least one iteration of the loop due to pending being guaranteed to be non-zero. That is, we may remove extra variable and check in the IRQ handler. Link: http://lkml.kernel.org/r/20191022172922.61232-9-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pca953x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 232e3f987511..9f3301130efb 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -743,7 +743,6 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) struct pca953x_chip *chip = devid; u8 pending[MAX_BANK]; u8 level; - unsigned nhandled = 0; int i; if (!pca953x_irq_pending(chip, pending)) @@ -755,11 +754,10 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain, level + (BANK_SZ * i))); pending[i] &= ~(1 << level); - nhandled++; } } - return (nhandled > 0) ? IRQ_HANDLED : IRQ_NONE; + return IRQ_HANDLED; } static int pca953x_irq_setup(struct pca953x_chip *chip, From 0a0a0219d6c85e6dd3dd5487caa3fb7b540f45f1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:33 -0800 Subject: [PATCH 71/86] gpio: pca953x: use input from regs structure in pca953x_irq_pending() While PCA_PCAL is defined for PCA953X type only, we still may use an offset of the input from regs structure for sake of consistency. Link: http://lkml.kernel.org/r/20191022172922.61232-10-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 9f3301130efb..31375138ee46 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -689,7 +689,7 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) return false; /* Check latched inputs and clear interrupt status */ - ret = pca953x_read_regs(chip, PCA953X_INPUT, cur_stat); + ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); if (ret) return false; From 35d13d94893fbdb6bbcbd01aa703296e91c7f851 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:37 -0800 Subject: [PATCH 72/86] gpio: pca953x: convert to use bitmap API Instead of customized approach convert the driver to use bitmap API. [andriy.shevchenko@linux.intel.com: reduce stack usage in couple of functions] Link: http://lkml.kernel.org/r/20191023153056.64262-1-andriy.shevchenko@linux.intel.com Link: http://lkml.kernel.org/r/20191022172922.61232-11-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: William Breathitt Gray Cc: Thomas Petazzoni Cc: Marek Vasut Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Rasmus Villemoes Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pca953x.c | 164 +++++++++++++++--------------------- 1 file changed, 70 insertions(+), 94 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 31375138ee46..b23a38cee613 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -9,8 +9,7 @@ */ #include -#include -#include +#include #include #include #include @@ -116,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids); #define MAX_BANK 5 #define BANK_SZ 8 +#define MAX_LINE (MAX_BANK * BANK_SZ) #define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ) @@ -147,10 +147,10 @@ struct pca953x_chip { #ifdef CONFIG_GPIO_PCA953X_IRQ struct mutex irq_lock; - u8 irq_mask[MAX_BANK]; - u8 irq_stat[MAX_BANK]; - u8 irq_trig_raise[MAX_BANK]; - u8 irq_trig_fall[MAX_BANK]; + DECLARE_BITMAP(irq_mask, MAX_LINE); + DECLARE_BITMAP(irq_stat, MAX_LINE); + DECLARE_BITMAP(irq_trig_raise, MAX_LINE); + DECLARE_BITMAP(irq_trig_fall, MAX_LINE); struct irq_chip irq_chip; #endif atomic_t wakeup_path; @@ -334,12 +334,16 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off, return regaddr; } -static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val) +static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val) { u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true); - int ret; + u8 value[MAX_BANK]; + int i, ret; - ret = regmap_bulk_write(chip->regmap, regaddr, val, NBANK(chip)); + for (i = 0; i < NBANK(chip); i++) + value[i] = bitmap_get_value8(val, i * BANK_SZ); + + ret = regmap_bulk_write(chip->regmap, regaddr, value, NBANK(chip)); if (ret < 0) { dev_err(&chip->client->dev, "failed writing register\n"); return ret; @@ -348,17 +352,21 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val) return 0; } -static int pca953x_read_regs(struct pca953x_chip *chip, int reg, u8 *val) +static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val) { u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true); - int ret; + u8 value[MAX_BANK]; + int i, ret; - ret = regmap_bulk_read(chip->regmap, regaddr, val, NBANK(chip)); + ret = regmap_bulk_read(chip->regmap, regaddr, value, NBANK(chip)); if (ret < 0) { dev_err(&chip->client->dev, "failed reading register\n"); return ret; } + for (i = 0; i < NBANK(chip); i++) + bitmap_set_value8(val, value[i], i * BANK_SZ); + return 0; } @@ -460,10 +468,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct pca953x_chip *chip = gpiochip_get_data(gc); - unsigned long offset; - unsigned long bank_mask; - int bank; - u8 reg_val[MAX_BANK]; + DECLARE_BITMAP(reg_val, MAX_LINE); int ret; mutex_lock(&chip->i2c_lock); @@ -471,11 +476,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, if (ret) goto exit; - for_each_set_clump8(offset, bank_mask, mask, gc->ngpio) { - bank = offset / 8; - reg_val[bank] &= ~bank_mask; - reg_val[bank] |= bitmap_get_value8(bits, offset) & bank_mask; - } + bitmap_replace(reg_val, reg_val, bits, mask, gc->ngpio); pca953x_write_regs(chip, chip->regs->output, reg_val); exit: @@ -602,10 +603,9 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 new_irqs; - int level, i; - u8 invert_irq_mask[MAX_BANK]; - u8 reg_direction[MAX_BANK]; + DECLARE_BITMAP(irq_mask, MAX_LINE); + DECLARE_BITMAP(reg_direction, MAX_LINE); + int level; pca953x_read_regs(chip, chip->regs->direction, reg_direction); @@ -613,25 +613,18 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) /* Enable latch on interrupt-enabled inputs */ pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask); - for (i = 0; i < NBANK(chip); i++) - invert_irq_mask[i] = ~chip->irq_mask[i]; + bitmap_complement(irq_mask, chip->irq_mask, gc->ngpio); /* Unmask enabled interrupts */ - pca953x_write_regs(chip, PCAL953X_INT_MASK, invert_irq_mask); + pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask); } + bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio); + bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio); + /* Look for any newly setup interrupt */ - for (i = 0; i < NBANK(chip); i++) { - new_irqs = chip->irq_trig_fall[i] | chip->irq_trig_raise[i]; - new_irqs &= reg_direction[i]; - - while (new_irqs) { - level = __ffs(new_irqs); - pca953x_gpio_direction_input(&chip->gpio_chip, - level + (BANK_SZ * i)); - new_irqs &= ~(1 << level); - } - } + for_each_set_bit(level, irq_mask, gc->ngpio) + pca953x_gpio_direction_input(&chip->gpio_chip, level); mutex_unlock(&chip->irq_lock); } @@ -672,15 +665,15 @@ static void pca953x_irq_shutdown(struct irq_data *d) chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask; } -static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) +static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending) { - u8 cur_stat[MAX_BANK]; - u8 old_stat[MAX_BANK]; - bool pending_seen = false; - bool trigger_seen = false; - u8 trigger[MAX_BANK]; - u8 reg_direction[MAX_BANK]; - int ret, i; + struct gpio_chip *gc = &chip->gpio_chip; + DECLARE_BITMAP(reg_direction, MAX_LINE); + DECLARE_BITMAP(old_stat, MAX_LINE); + DECLARE_BITMAP(cur_stat, MAX_LINE); + DECLARE_BITMAP(new_stat, MAX_LINE); + DECLARE_BITMAP(trigger, MAX_LINE); + int ret; if (chip->driver_data & PCA_PCAL) { /* Read the current interrupt status from the device */ @@ -693,16 +686,12 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) if (ret) return false; - for (i = 0; i < NBANK(chip); i++) { - /* Apply filter for rising/falling edge selection */ - pending[i] = (~cur_stat[i] & chip->irq_trig_fall[i]) | - (cur_stat[i] & chip->irq_trig_raise[i]); - pending[i] &= trigger[i]; - if (pending[i]) - pending_seen = true; - } + /* Apply filter for rising/falling edge selection */ + bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio); - return pending_seen; + bitmap_and(pending, new_stat, trigger, gc->ngpio); + + return !bitmap_empty(pending, gc->ngpio); } ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); @@ -711,51 +700,38 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) /* Remove output pins from the equation */ pca953x_read_regs(chip, chip->regs->direction, reg_direction); - for (i = 0; i < NBANK(chip); i++) - cur_stat[i] &= reg_direction[i]; - memcpy(old_stat, chip->irq_stat, NBANK(chip)); + bitmap_copy(old_stat, chip->irq_stat, gc->ngpio); - for (i = 0; i < NBANK(chip); i++) { - trigger[i] = (cur_stat[i] ^ old_stat[i]) & chip->irq_mask[i]; - if (trigger[i]) - trigger_seen = true; - } + bitmap_and(new_stat, cur_stat, reg_direction, gc->ngpio); + bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio); + bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio); - if (!trigger_seen) + if (bitmap_empty(trigger, gc->ngpio)) return false; - memcpy(chip->irq_stat, cur_stat, NBANK(chip)); + bitmap_copy(chip->irq_stat, new_stat, gc->ngpio); - for (i = 0; i < NBANK(chip); i++) { - pending[i] = (old_stat[i] & chip->irq_trig_fall[i]) | - (cur_stat[i] & chip->irq_trig_raise[i]); - pending[i] &= trigger[i]; - if (pending[i]) - pending_seen = true; - } + bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio); + bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio); + bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio); + bitmap_and(pending, new_stat, trigger, gc->ngpio); - return pending_seen; + return !bitmap_empty(pending, gc->ngpio); } static irqreturn_t pca953x_irq_handler(int irq, void *devid) { struct pca953x_chip *chip = devid; - u8 pending[MAX_BANK]; - u8 level; - int i; + struct gpio_chip *gc = &chip->gpio_chip; + DECLARE_BITMAP(pending, MAX_LINE); + int level; if (!pca953x_irq_pending(chip, pending)) return IRQ_NONE; - for (i = 0; i < NBANK(chip); i++) { - while (pending[i]) { - level = __ffs(pending[i]); - handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain, - level + (BANK_SZ * i))); - pending[i] &= ~(1 << level); - } - } + for_each_set_bit(level, pending, gc->ngpio) + handle_nested_irq(irq_find_mapping(gc->irq.domain, level)); return IRQ_HANDLED; } @@ -765,8 +741,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, { struct i2c_client *client = chip->client; struct irq_chip *irq_chip = &chip->irq_chip; - u8 reg_direction[MAX_BANK]; - int ret, i; + DECLARE_BITMAP(reg_direction, MAX_LINE); + DECLARE_BITMAP(irq_stat, MAX_LINE); + int ret; if (!client->irq) return 0; @@ -777,7 +754,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, if (!(chip->driver_data & PCA_INT)) return 0; - ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat); + ret = pca953x_read_regs(chip, chip->regs->input, irq_stat); if (ret) return ret; @@ -787,8 +764,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, * this purpose. */ pca953x_read_regs(chip, chip->regs->direction, reg_direction); - for (i = 0; i < NBANK(chip); i++) - chip->irq_stat[i] &= reg_direction[i]; + bitmap_and(chip->irq_stat, irq_stat, reg_direction, chip->gpio_chip.ngpio); mutex_init(&chip->irq_lock); ret = devm_request_threaded_irq(&client->dev, client->irq, @@ -840,8 +816,8 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) { + DECLARE_BITMAP(val, MAX_LINE); int ret; - u8 val[MAX_BANK]; ret = regcache_sync_region(chip->regmap, chip->regs->output, chip->regs->output + NBANK(chip)); @@ -855,9 +831,9 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) /* set platform specific polarity inversion */ if (invert) - memset(val, 0xFF, NBANK(chip)); + bitmap_fill(val, MAX_LINE); else - memset(val, 0, NBANK(chip)); + bitmap_zero(val, MAX_LINE); ret = pca953x_write_regs(chip, chip->regs->invert, val); out: @@ -866,8 +842,8 @@ out: static int device_pca957x_init(struct pca953x_chip *chip, u32 invert) { + DECLARE_BITMAP(val, MAX_LINE); int ret; - u8 val[MAX_BANK]; ret = device_pca95xx_init(chip, invert); if (ret) From b27d8517365eeac907de1cc1e91053ccf18179c3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:40 -0800 Subject: [PATCH 73/86] gpio: pca953x: tighten up indentation There is no need to split some of the lines. However, improve the style of multi-line comment. On top of this there is no need to have double space. Correct above indentation issues without altering the functionality. Link: http://lkml.kernel.org/r/20191022172922.61232-12-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Rasmus Villemoes Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-pca953x.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index b23a38cee613..6652bee01966 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -421,7 +421,9 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off) ret = regmap_read(chip->regmap, inreg, ®_val); mutex_unlock(&chip->i2c_lock); if (ret < 0) { - /* NOTE: diagnostic already emitted; that's all we should + /* + * NOTE: + * diagnostic already emitted; that's all we should * do unless gpio_*_value_cansleep() calls become different * from their nonsleeping siblings (and report faults). */ @@ -736,8 +738,7 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) return IRQ_HANDLED; } -static int pca953x_irq_setup(struct pca953x_chip *chip, - int irq_base) +static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) { struct i2c_client *client = chip->client; struct irq_chip *irq_chip = &chip->irq_chip; @@ -787,9 +788,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, irq_chip->irq_set_type = pca953x_irq_set_type; irq_chip->irq_shutdown = pca953x_irq_shutdown; - ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip, - irq_base, handle_simple_irq, - IRQ_TYPE_NONE); + ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip, + irq_base, handle_simple_irq, + IRQ_TYPE_NONE); if (ret) { dev_err(&client->dev, "could not connect irqchip to gpiochip\n"); @@ -863,7 +864,7 @@ out: static const struct of_device_id pca953x_dt_ids[]; static int pca953x_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) + const struct i2c_device_id *i2c_id) { struct pca953x_platform_data *pdata; struct pca953x_chip *chip; @@ -872,8 +873,7 @@ static int pca953x_probe(struct i2c_client *client, u32 invert = 0; struct regulator *reg; - chip = devm_kzalloc(&client->dev, - sizeof(struct pca953x_chip), GFP_KERNEL); + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (chip == NULL) return -ENOMEM; @@ -987,7 +987,7 @@ static int pca953x_probe(struct i2c_client *client, if (pdata && pdata->setup) { ret = pdata->setup(client, chip->gpio_chip.base, - chip->gpio_chip.ngpio, pdata->context); + chip->gpio_chip.ngpio, pdata->context); if (ret < 0) dev_warn(&client->dev, "setup failed, %d\n", ret); } @@ -1007,7 +1007,7 @@ static int pca953x_remove(struct i2c_client *client) if (pdata && pdata->teardown) { ret = pdata->teardown(client, chip->gpio_chip.base, - chip->gpio_chip.ngpio, pdata->context); + chip->gpio_chip.ngpio, pdata->context); if (ret < 0) dev_err(&client->dev, "teardown failed, %d\n", ret); } else { From a73c948952ccd663b47f6cf59ac0fcae5bf0512c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:53:44 -0800 Subject: [PATCH 74/86] alpha: use pgtable-nopud instead of 4level-fixup Patch series "mm: remove __ARCH_HAS_4LEVEL_HACK", v13. These patches convert several architectures to use page table folding and remove __ARCH_HAS_4LEVEL_HACK along with include/asm-generic/4level-fixup.h. For the nommu configurations the folding is already implemented by the generic code so the only change was to use the appropriate header file. As for the rest, the changes are mostly about mechanical replacement of pgd accessors with pud/pmd ones and the addition of higher levels to page table traversals. With Vineet's patches from "elide extraneous generated code for folded p4d/pud/pmd" series [1] there is a small shrink of the kernel size of about -0.01% for the defconfig builds. This patch (of 13): It is not likely alpha will have 5-level page tables. Replace usage of include/asm-generic/4level-fixup.h and implied __ARCH_HAS_4LEVEL_HACK with include/asm-generic/pgtable-nopud.h and adjust page table manipulation macros and functions accordingly. Link: http://lkml.kernel.org/r/1572938135-31886-2-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Cc: Anatoly Pugachev Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mmzone.h | 1 - arch/alpha/include/asm/pgalloc.h | 4 ++-- arch/alpha/include/asm/pgtable.h | 24 ++++++++++++------------ arch/alpha/mm/init.c | 12 ++++++++---- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h index 889b5d3ad825..7ee144f484f1 100644 --- a/arch/alpha/include/asm/mmzone.h +++ b/arch/alpha/include/asm/mmzone.h @@ -73,7 +73,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) -#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32)) #define pte_pfn(pte) (pte_val(pte) >> 32) #define mk_pte(page, pgprot) \ diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index eb91f1e85629..a1a29f60934c 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -27,9 +27,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) } static inline void -pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pgd_set(pgd, pmd); + pud_set(pud, pmd); } extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 065b57f408c3..299791ce14b6 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -2,7 +2,7 @@ #ifndef _ALPHA_PGTABLE_H #define _ALPHA_PGTABLE_H -#include +#include /* * This file contains the functions and defines necessary to modify and use @@ -226,8 +226,8 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep) { pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } -extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp) -{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } +extern inline void pud_set(pud_t * pudp, pmd_t * pmdp) +{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } extern inline unsigned long @@ -238,11 +238,11 @@ pmd_page_vaddr(pmd_t pmd) #ifndef CONFIG_DISCONTIGMEM #define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32)) -#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32)) +#define pud_page(pud) (mem_map + ((pud_val(pud) & _PFN_MASK) >> 32)) #endif -extern inline unsigned long pgd_page_vaddr(pgd_t pgd) -{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } +extern inline unsigned long pud_page_vaddr(pud_t pgd) +{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } extern inline int pte_none(pte_t pte) { return !pte_val(pte); } extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; } @@ -256,10 +256,10 @@ extern inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~_PFN_MASK) != _P extern inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _PAGE_VALID; } extern inline void pmd_clear(pmd_t * pmdp) { pmd_val(*pmdp) = 0; } -extern inline int pgd_none(pgd_t pgd) { return !pgd_val(pgd); } -extern inline int pgd_bad(pgd_t pgd) { return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; } -extern inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_VALID; } -extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; } +extern inline int pud_none(pud_t pud) { return !pud_val(pud); } +extern inline int pud_bad(pud_t pud) { return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; } +extern inline int pud_present(pud_t pud) { return pud_val(pud) & _PAGE_VALID; } +extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; } /* * The following only work if pte_present() is true. @@ -301,9 +301,9 @@ extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } */ /* Find an entry in the second-level page table.. */ -extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) { - pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); smp_read_barrier_depends(); /* see above */ return ret; } diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index e2cbec3789e8..12e218d3792a 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -146,6 +146,8 @@ callback_init(void * kernel_end) { struct crb_struct * crb; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; void *two_pages; @@ -184,8 +186,10 @@ callback_init(void * kernel_end) memset(two_pages, 0, 2*PAGE_SIZE); pgd = pgd_offset_k(VMALLOC_START); - pgd_set(pgd, (pmd_t *)two_pages); - pmd = pmd_offset(pgd, VMALLOC_START); + p4d = p4d_offset(pgd, VMALLOC_START); + pud = pud_offset(p4d, VMALLOC_START); + pud_set(pud, (pmd_t *)two_pages); + pmd = pmd_offset(pud, VMALLOC_START); pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE)); if (alpha_using_srm) { @@ -214,9 +218,9 @@ callback_init(void * kernel_end) /* Newer consoles (especially on larger systems) may require more pages of PTEs. Grab additional pages as needed. */ - if (pmd != pmd_offset(pgd, vaddr)) { + if (pmd != pmd_offset(pud, vaddr)) { memset(kernel_end, 0, PAGE_SIZE); - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); pmd_set(pmd, (pte_t *)kernel_end); kernel_end += PAGE_SIZE; } From aa6628230deb3a0d871dc51923531b41dfbef352 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:53:48 -0800 Subject: [PATCH 75/86] arm: nommu: use pgtable-nopud instead of 4level-fixup The generic nommu implementation of page table manipulation takes care of folding of the upper levels and does not require fixups. Simply replace of include/asm-generic/4level-fixup.h with include/asm-generic/pgtable-nopud.h. Link: http://lkml.kernel.org/r/1572938135-31886-3-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Russell King Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 3ae120cd1715..eabcb48a7840 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -12,7 +12,7 @@ #ifndef CONFIG_MMU -#include +#include #include #else From d13252ea800e1f2a39e40c2b8a4397b21a80555d Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:53:52 -0800 Subject: [PATCH 76/86] c6x: use pgtable-nopud instead of 4level-fixup c6x is a nommu architecture and does not require fixup for upper layers of the page tables because it is already handled by the generic nommu implementation. Replace usage of include/asm-generic/4level-fixup.h with include/asm-generic/pgtable-nopud.h Link: http://lkml.kernel.org/r/1572938135-31886-4-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/c6x/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h index 0b6919c00413..197c473b796a 100644 --- a/arch/c6x/include/asm/pgtable.h +++ b/arch/c6x/include/asm/pgtable.h @@ -8,7 +8,7 @@ #ifndef _ASM_C6X_PGTABLE_H #define _ASM_C6X_PGTABLE_H -#include +#include #include #include From f6f7caeb58532db2e46c40eee2d7e8969d5c707e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:53:55 -0800 Subject: [PATCH 77/86] m68k: nommu: use pgtable-nopud instead of 4level-fixup The generic nommu implementation of page table manipulation takes care of folding of the upper levels and does not require fixups. Simply replace of include/asm-generic/4level-fixup.h with include/asm-generic/pgtable-nopud.h. Link: http://lkml.kernel.org/r/1572938135-31886-5-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Greg Ungerer Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/pgtable_no.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index c18165b0d904..ccc4568299e5 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -2,7 +2,7 @@ #ifndef _M68KNOMMU_PGTABLE_H #define _M68KNOMMU_PGTABLE_H -#include +#include /* * (C) Copyright 2000-2002, Greg Ungerer From 60e50f34b13e9e40763be12aa55f2144d8da514c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:53:59 -0800 Subject: [PATCH 78/86] m68k: mm: use pgtable-nopXd instead of 4level-fixup m68k has two or three levels of page tables and can use appropriate pgtable-nopXd and folding of the upper layers. Replace usage of include/asm-generic/4level-fixup.h and explicit definitions of __PAGETABLE_PxD_FOLDED in m68k with include/asm-generic/pgtable-nopmd.h for two-level configurations and with include/asm-generic/pgtable-nopud.h for three-lelve configurations and adjust page table manipulation macros and functions accordingly. [akpm@linux-foundation.org: fix merge glitch] [geert@linux-m68k.org: more merge glitch fixes] [akpm@linux-foundation.org: s/bad_pgd/bad_pud/, per Mike] Link: http://lkml.kernel.org/r/1572938135-31886-6-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Greg Ungerer Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/mcf_pgalloc.h | 7 ----- arch/m68k/include/asm/mcf_pgtable.h | 28 ++++++----------- arch/m68k/include/asm/mmu_context.h | 12 +++++++- arch/m68k/include/asm/motorola_pgalloc.h | 4 +-- arch/m68k/include/asm/motorola_pgtable.h | 32 +++++++++++-------- arch/m68k/include/asm/page.h | 9 ++++-- arch/m68k/include/asm/pgtable_mm.h | 11 ++++--- arch/m68k/include/asm/sun3_pgalloc.h | 5 --- arch/m68k/include/asm/sun3_pgtable.h | 18 ----------- arch/m68k/kernel/sys_m68k.c | 10 +++++- arch/m68k/mm/init.c | 6 ++-- arch/m68k/mm/kmap.c | 39 ++++++++++++++++++------ arch/m68k/mm/mcfmmu.c | 16 +++++++++- arch/m68k/mm/motorola.c | 17 +++++++---- arch/m68k/sun3x/dvma.c | 7 +++-- 15 files changed, 129 insertions(+), 92 deletions(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index b34d44d666a4..82ec54c2eaa4 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -28,9 +28,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) return (pmd_t *) pgd; } -#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); }) - #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \ (unsigned long)(page_address(page))) @@ -45,8 +42,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, __free_page(page); } -#define __pmd_free_tlb(tlb, pmd, address) do { } while (0) - static inline struct page *pte_alloc_one(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_DMA, 0); @@ -100,6 +95,4 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return new_pgd; } -#define pgd_populate(mm, pmd, pte) BUG() - #endif /* M68K_MCF_PGALLOC_H */ diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index 5d5502cb2b2d..b9f45aeded25 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -198,17 +198,9 @@ static inline int pmd_bad2(pmd_t *pmd) { return 0; } #define pmd_present(pmd) (!pmd_none2(&(pmd))) static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = 0; } -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t *pgdp) {} - #define pte_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \ __FILE__, __LINE__, pte_val(e)) -#define pmd_ERROR(e) \ - printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \ - __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \ __FILE__, __LINE__, pgd_val(e)) @@ -339,14 +331,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* - * Find an entry in the second-level pagetable. - */ -static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address) -{ - return (pmd_t *) pgd; -} - /* * Find an entry in the third-level pagetable. */ @@ -360,12 +344,16 @@ static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address) static inline void nocache_page(void *vaddr) { pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long addr = (unsigned long) vaddr; dir = pgd_offset_k(addr); - pmdp = pmd_offset(dir, addr); + p4dp = p4d_offset(dir, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_kernel(pmdp, addr); *ptep = pte_mknocache(*ptep); } @@ -376,12 +364,16 @@ static inline void nocache_page(void *vaddr) static inline void cache_page(void *vaddr) { pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long addr = (unsigned long) vaddr; dir = pgd_offset_k(addr); - pmdp = pmd_offset(dir, addr); + p4dp = p4d_offset(dir, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_kernel(pmdp, addr); *ptep = pte_mkcache(*ptep); } diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h index f5b1852b4663..cac9f289d1f6 100644 --- a/arch/m68k/include/asm/mmu_context.h +++ b/arch/m68k/include/asm/mmu_context.h @@ -100,6 +100,8 @@ static inline void load_ksp_mmu(struct task_struct *task) struct mm_struct *mm; int asid; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long mmuar; @@ -127,7 +129,15 @@ static inline void load_ksp_mmu(struct task_struct *task) if (pgd_none(*pgd)) goto bug; - pmd = pmd_offset(pgd, mmuar); + p4d = p4d_offset(pgd, mmuar); + if (p4d_none(*p4d)) + goto bug; + + pud = pud_offset(p4d, mmuar); + if (pud_none(*pud)) + goto bug; + + pmd = pmd_offset(pud, mmuar); if (pmd_none(*pmd)) goto bug; diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index acab315c851f..ff9cc401ffd1 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -106,9 +106,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page } #define pmd_pgtable(pmd) pmd_page(pmd) -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pgd_set(pgd, pmd); + pud_set(pud, pmd); } #endif /* _MOTOROLA_PGALLOC_H */ diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 7f66a7bad7a5..62bedc61f110 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -117,14 +117,14 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) } } -static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp) +static inline void pud_set(pud_t *pudp, pmd_t *pmdp) { - pgd_val(*pgdp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp); + pud_val(*pudp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp); } #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK)) #define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK)) -#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK)) +#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK)) #define pte_none(pte) (!pte_val(pte)) @@ -147,11 +147,11 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp) #define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd))) -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE) -#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE) -#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; }) -#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT)) +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) ((pud_val(pud) & _DESCTYPE_MASK) != _PAGE_TABLE) +#define pud_present(pud) (pud_val(pud) & _PAGE_TABLE) +#define pud_clear(pudp) ({ pud_val(*pudp) = 0; }) +#define pud_page(pud) (mem_map + ((unsigned long)(__va(pud_val(pud)) - PAGE_OFFSET) >> PAGE_SHIFT)) #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) @@ -209,9 +209,9 @@ static inline pgd_t *pgd_offset_k(unsigned long address) /* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address) +static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address) { - return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1)); + return (pmd_t *)pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1)); } /* Find an entry in the third-level page table.. */ @@ -239,11 +239,15 @@ static inline void nocache_page(void *vaddr) if (CPU_IS_040_OR_060) { pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; dir = pgd_offset_k(addr); - pmdp = pmd_offset(dir, addr); + p4dp = p4d_offset(dir, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_kernel(pmdp, addr); *ptep = pte_mknocache(*ptep); } @@ -255,11 +259,15 @@ static inline void cache_page(void *vaddr) if (CPU_IS_040_OR_060) { pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; dir = pgd_offset_k(addr); - pmdp = pmd_offset(dir, addr); + p4dp = p4d_offset(dir, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_kernel(pmdp, addr); *ptep = pte_mkcache(*ptep); } diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index 700d8195880c..05e1e1e77a9a 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -21,19 +21,22 @@ /* * These are used to make use of C type-checking.. */ -typedef struct { unsigned long pte; } pte_t; +#if !defined(CONFIG_MMU) || CONFIG_PGTABLE_LEVELS == 3 typedef struct { unsigned long pmd[16]; } pmd_t; +#define pmd_val(x) ((&x)->pmd[0]) +#define __pmd(x) ((pmd_t) { { (x) }, }) +#endif + +typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; typedef struct page *pgtable_t; #define pte_val(x) ((x).pte) -#define pmd_val(x) ((&x)->pmd[0]) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { { (x) }, }) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 646c174fff99..2bf5c3501e78 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -2,7 +2,12 @@ #ifndef _M68K_PGTABLE_H #define _M68K_PGTABLE_H -#include + +#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE) +#include +#else +#include +#endif #include @@ -30,9 +35,7 @@ /* PMD_SHIFT determines the size of the area a second-level page table can map */ -#ifdef CONFIG_SUN3 -#define PMD_SHIFT 17 -#else +#if CONFIG_PGTABLE_LEVELS == 3 #define PMD_SHIFT 22 #endif #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 856121122b91..11b95dadf7c0 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -17,8 +17,6 @@ extern const char bad_pmd_string[]; -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) - #define __pte_free_tlb(tlb,pte,addr) \ do { \ pgtable_pte_page_dtor(pte); \ @@ -41,7 +39,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page * inside the pgd, so has no extra memory associated with it. */ #define pmd_free(mm, x) do { } while (0) -#define __pmd_free_tlb(tlb, x, addr) do { } while (0) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { @@ -58,6 +55,4 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm) return new_pgd; } -#define pgd_populate(mm, pmd, pte) BUG() - #endif /* SUN3_PGALLOC_H */ diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index c987d50866b4..bc4155264810 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -110,11 +110,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pmd_set(pmdp,ptep) do {} while (0) -static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp) -{ - pgd_val(*pgdp) = virt_to_phys(pmdp); -} - #define __pte_page(pte) \ ((unsigned long) __va ((pte_val (pte) & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT)) #define __pmd_page(pmd) \ @@ -145,16 +140,9 @@ static inline int pmd_present2 (pmd_t *pmd) { return pmd_val (*pmd) & SUN3_PMD_V #define pmd_present(pmd) (!pmd_none2(&(pmd))) static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; } -static inline int pgd_none (pgd_t pgd) { return 0; } -static inline int pgd_bad (pgd_t pgd) { return 0; } -static inline int pgd_present (pgd_t pgd) { return 1; } -static inline void pgd_clear (pgd_t *pgdp) {} - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) -#define pmd_ERROR(e) \ - pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -194,12 +182,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; /* Find an entry in a kernel pagetable directory. */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* Find an entry in the second-level pagetable. */ -static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address) -{ - return (pmd_t *) pgd; -} - /* Find an entry in the third-level pagetable. */ #define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address)) diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 6363ec83a290..18a4de7d5934 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -465,6 +465,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5, for (;;) { struct mm_struct *mm = current->mm; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; @@ -474,7 +476,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5, pgd = pgd_offset(mm, (unsigned long)mem); if (!pgd_present(*pgd)) goto bad_access; - pmd = pmd_offset(pgd, (unsigned long)mem); + p4d = p4d_offset(pgd, (unsigned long)mem); + if (!p4d_present(*p4d)) + goto bad_access; + pud = pud_offset(p4d, (unsigned long)mem); + if (!pud_present(*pud)) + goto bad_access; + pmd = pmd_offset(pud, (unsigned long)mem); if (!pmd_present(*pmd)) goto bad_access; pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl); diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 778cacb7d57b..27c453f4fffe 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -130,8 +130,10 @@ static inline void init_pointer_tables(void) /* insert pointer tables allocated so far into the tablelist */ init_pointer_table((unsigned long)kernel_pg_dir); for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_present(kernel_pg_dir[i])) - init_pointer_table(__pgd_page(kernel_pg_dir[i])); + pud_t *pud = (pud_t *)(&kernel_pg_dir[i]); + + if (pud_present(*pud)) + init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i])); } /* insert also pointer table that we used to unmap the zero page */ diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c index 23f9466aabb5..120030ad8dc4 100644 --- a/arch/m68k/mm/kmap.c +++ b/arch/m68k/mm/kmap.c @@ -63,18 +63,23 @@ static void __free_io_area(void *addr, unsigned long size) { unsigned long virtaddr = (unsigned long)addr; pgd_t *pgd_dir; + p4d_t *p4d_dir; + pud_t *pud_dir; pmd_t *pmd_dir; pte_t *pte_dir; while ((long)size > 0) { pgd_dir = pgd_offset_k(virtaddr); - if (pgd_bad(*pgd_dir)) { - printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir)); - pgd_clear(pgd_dir); + p4d_dir = p4d_offset(pgd_dir, virtaddr); + pud_dir = pud_offset(p4d_dir, virtaddr); + if (pud_bad(*pud_dir)) { + printk("iounmap: bad pud(%08lx)\n", pud_val(*pud_dir)); + pud_clear(pud_dir); return; } - pmd_dir = pmd_offset(pgd_dir, virtaddr); + pmd_dir = pmd_offset(pud_dir, virtaddr); +#if CONFIG_PGTABLE_LEVELS == 3 if (CPU_IS_020_OR_030) { int pmd_off = (virtaddr/PTRTREESIZE) & 15; int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK; @@ -87,6 +92,7 @@ static void __free_io_area(void *addr, unsigned long size) } else if (pmd_type == 0) continue; } +#endif if (pmd_bad(*pmd_dir)) { printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir)); @@ -159,6 +165,8 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla unsigned long virtaddr, retaddr; long offset; pgd_t *pgd_dir; + p4d_t *p4d_dir; + pud_t *pud_dir; pmd_t *pmd_dir; pte_t *pte_dir; @@ -245,18 +253,23 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr); #endif pgd_dir = pgd_offset_k(virtaddr); - pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr); + p4d_dir = p4d_offset(pgd_dir, virtaddr); + pud_dir = pud_offset(p4d_dir, virtaddr); + pmd_dir = pmd_alloc(&init_mm, pud_dir, virtaddr); if (!pmd_dir) { printk("ioremap: no mem for pmd_dir\n"); return NULL; } +#if CONFIG_PGTABLE_LEVELS == 3 if (CPU_IS_020_OR_030) { pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr; physaddr += PTRTREESIZE; virtaddr += PTRTREESIZE; size -= PTRTREESIZE; - } else { + } else +#endif + { pte_dir = pte_alloc_kernel(pmd_dir, virtaddr); if (!pte_dir) { printk("ioremap: no mem for pte_dir\n"); @@ -307,6 +320,8 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode) { unsigned long virtaddr = (unsigned long)addr; pgd_t *pgd_dir; + p4d_t *p4d_dir; + pud_t *pud_dir; pmd_t *pmd_dir; pte_t *pte_dir; @@ -341,13 +356,16 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode) while ((long)size > 0) { pgd_dir = pgd_offset_k(virtaddr); - if (pgd_bad(*pgd_dir)) { - printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir)); - pgd_clear(pgd_dir); + p4d_dir = p4d_offset(pgd_dir, virtaddr); + pud_dir = pud_offset(p4d_dir, virtaddr); + if (pud_bad(*pud_dir)) { + printk("iocachemode: bad pud(%08lx)\n", pud_val(*pud_dir)); + pud_clear(pud_dir); return; } - pmd_dir = pmd_offset(pgd_dir, virtaddr); + pmd_dir = pmd_offset(pud_dir, virtaddr); +#if CONFIG_PGTABLE_LEVELS == 3 if (CPU_IS_020_OR_030) { int pmd_off = (virtaddr/PTRTREESIZE) & 15; @@ -359,6 +377,7 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode) continue; } } +#endif if (pmd_bad(*pmd_dir)) { printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir)); diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 6cb1e41d58d0..0ea375607767 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -92,6 +92,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word) unsigned long flags, mmuar, mmutr; struct mm_struct *mm; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; int asid; @@ -113,7 +115,19 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word) return -1; } - pmd = pmd_offset(pgd, mmuar); + p4d = p4d_offset(pgd, mmuar); + if (p4d_none(*p4d)) { + local_irq_restore(flags); + return -1; + } + + pud = pud_offset(p4d, mmuar); + if (pud_none(*pud)) { + local_irq_restore(flags); + return -1; + } + + pmd = pmd_offset(pud, mmuar); if (pmd_none(*pmd)) { local_irq_restore(flags); return -1; diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 356601bf96d9..4857985b8080 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -82,9 +82,11 @@ static pmd_t * __init kernel_ptr_table(void) */ last = (unsigned long)kernel_pg_dir; for (i = 0; i < PTRS_PER_PGD; i++) { - if (!pgd_present(kernel_pg_dir[i])) + pud_t *pud = (pud_t *)(&kernel_pg_dir[i]); + + if (!pud_present(*pud)) continue; - pmd = __pgd_page(kernel_pg_dir[i]); + pmd = pgd_page_vaddr(kernel_pg_dir[i]); if (pmd > last) last = pmd; } @@ -118,6 +120,8 @@ static void __init map_node(int node) #define ROOTTREESIZE (32*1024*1024) unsigned long physaddr, virtaddr, size; pgd_t *pgd_dir; + p4d_t *p4d_dir; + pud_t *pud_dir; pmd_t *pmd_dir; pte_t *pte_dir; @@ -149,14 +153,16 @@ static void __init map_node(int node) continue; } } - if (!pgd_present(*pgd_dir)) { + p4d_dir = p4d_offset(pgd_dir, virtaddr); + pud_dir = pud_offset(p4d_dir, virtaddr); + if (!pud_present(*pud_dir)) { pmd_dir = kernel_ptr_table(); #ifdef DEBUG printk ("[new pointer %p]", pmd_dir); #endif - pgd_set(pgd_dir, pmd_dir); + pud_set(pud_dir, pmd_dir); } else - pmd_dir = pmd_offset(pgd_dir, virtaddr); + pmd_dir = pmd_offset(pud_dir, virtaddr); if (CPU_IS_020_OR_030) { if (virtaddr) { @@ -304,4 +310,3 @@ void __init paging_init(void) node_set_state(i, N_NORMAL_MEMORY); } } - diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c index 89e630e66555..c4b8aa1d80f4 100644 --- a/arch/m68k/sun3x/dvma.c +++ b/arch/m68k/sun3x/dvma.c @@ -80,6 +80,8 @@ inline int dvma_map_cpu(unsigned long kaddr, unsigned long vaddr, int len) { pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; unsigned long end; int ret = 0; @@ -90,12 +92,14 @@ inline int dvma_map_cpu(unsigned long kaddr, pr_debug("dvma: mapping kern %08lx to virt %08lx\n", kaddr, vaddr); pgd = pgd_offset_k(vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); do { pmd_t *pmd; unsigned long end2; - if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) { + if((pmd = pmd_alloc(&init_mm, pud, vaddr)) == NULL) { ret = -ENOMEM; goto out; } @@ -196,4 +200,3 @@ void dvma_unmap_iommu(unsigned long baddr, int len) } } - From ed48e1f812b585e2af5dee6e08712c64d75978e2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:03 -0800 Subject: [PATCH 79/86] microblaze: use pgtable-nopmd instead of 4level-fixup microblaze has only two-level page tables and can use pgtable-nopmd and folding of the upper layers. Replace usage of include/asm-generic/4level-fixup.h and explicit definition of __PAGETABLE_PMD_FOLDED in microblaze with include/asm-generic/pgtable-nopmd.h and adjust page table manipulation macros and functions accordingly. Link: http://lkml.kernel.org/r/1572938135-31886-7-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/page.h | 3 --- arch/microblaze/include/asm/pgalloc.h | 16 -------------- arch/microblaze/include/asm/pgtable.h | 32 ++------------------------- arch/microblaze/kernel/signal.c | 10 ++++++--- arch/microblaze/mm/init.c | 7 ++++-- arch/microblaze/mm/pgtable.c | 13 +++++++++-- 6 files changed, 25 insertions(+), 56 deletions(-) diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index d506bb0893f9..f4b44b24b02e 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -90,7 +90,6 @@ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgprot; } pgprot_t; /* FIXME this can depend on linux kernel version */ # ifdef CONFIG_MMU -typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; @@ -103,7 +102,6 @@ typedef struct { p4d_t pge[1]; } pgd_t; # define pgprot_val(x) ((x).pgprot) # ifdef CONFIG_MMU -# define pmd_val(x) ((x).pmd) # define pgd_val(x) ((x).pgd) # else /* CONFIG_MMU */ # define pmd_val(x) ((x).ste[0]) @@ -112,7 +110,6 @@ typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define __pte(x) ((pte_t) { (x) }) -# define __pmd(x) ((pmd_t) { (x) }) # define __pgd(x) ((pgd_t) { (x) }) # define __pgprot(x) ((pgprot_t) { (x) }) diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 7ecb05baa601..fcf1e23f2e0a 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -41,13 +41,6 @@ static inline void free_pgd(pgd_t *pgd) #define pmd_pgtable(pmd) pmd_page(pmd) -/* - * We don't have any real pmd's, and this code never triggers because - * the pgd will always be present.. - */ -#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); }) - extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte)) @@ -58,15 +51,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define pmd_populate_kernel(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long) (pte)) -/* - * We don't have any real pmd's, and this code never triggers because - * the pgd will always be present.. - */ -#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(mm, x) do { } while (0) -#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x) -#define pgd_populate(mm, pmd, pte) BUG() - #endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_PGALLOC_H */ diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 954b69af451f..2def331f9e2c 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -59,9 +59,7 @@ extern int mem_init_done; #else /* CONFIG_MMU */ -#include - -#define __PAGETABLE_PMD_FOLDED 1 +#include #ifdef __KERNEL__ #ifndef __ASSEMBLY__ @@ -138,13 +136,8 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } * */ -/* PMD_SHIFT determines the size of the area mapped by the PTE pages */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_SHIFT) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT PMD_SHIFT +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -165,9 +158,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define pte_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \ __FILE__, __LINE__, pte_val(e)) -#define pmd_ERROR(e) \ - printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \ - __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \ __FILE__, __LINE__, pgd_val(e)) @@ -313,18 +303,6 @@ extern unsigned long empty_zero_page[1024]; __pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) | pgprot_val(prot)) #ifndef __ASSEMBLY__ -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -#define pgd_clear(xp) do { } while (0) -#define pgd_page(pgd) \ - ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) - /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -479,12 +457,6 @@ static inline void ptep_mkdirty(struct mm_struct *mm, #define pgd_index(address) ((address) >> PGDIR_SHIFT) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -/* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address) -{ - return (pmd_t *) dir; -} - /* Find an entry in the third-level page table.. */ #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index cdd4feb279c5..c9125c328949 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -160,6 +160,9 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, int err = 0, sig = ksig->sig; unsigned long address = 0; #ifdef CONFIG_MMU + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; #endif @@ -195,9 +198,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, address = ((unsigned long)frame->tramp); #ifdef CONFIG_MMU - pmdp = pmd_offset(pud_offset( - pgd_offset(current->mm, address), - address), address); + pgdp = pgd_offset(current->mm, address); + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); + pmdp = pmd_offset(pudp, address); preempt_disable(); ptep = pte_offset_map(pmdp, address); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index a015a951c8b7..050fc621c920 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -53,8 +53,11 @@ EXPORT_SYMBOL(kmap_prot); static inline pte_t *virt_to_kpte(unsigned long vaddr) { - return pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), - vaddr), vaddr); + pgd_t *pgd = pgd_offset_k(vaddr); + p4d_t *p4d = p4d_offset(pgd, vaddr); + pud_t *pud = pud_offset(p4d, vaddr); + + return pte_offset_kernel(pmd_offset(pud, vaddr), vaddr); } static void __init highmem_init(void) diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 010bb9cee2e4..68c26cacd930 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -134,11 +134,16 @@ EXPORT_SYMBOL(iounmap); int map_page(unsigned long va, phys_addr_t pa, int flags) { + p4d_t *p4d; + pud_t *pud; pmd_t *pd; pte_t *pg; int err = -ENOMEM; + /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pgd_offset_k(va), va); + p4d = p4d_offset(pgd_offset_k(va), va); + pud = pud_offset(p4d, va); + pd = pmd_offset(pud, va); /* Use middle 10 bits of VA to index the second-level map */ pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */ /* pg = pte_alloc_kernel(&init_mm, pd, va); */ @@ -188,13 +193,17 @@ void __init mapin_ram(void) static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) { pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; int retval = 0; pgd = pgd_offset(mm, addr & PAGE_MASK); if (pgd) { - pmd = pmd_offset(pgd, addr & PAGE_MASK); + p4d = p4d_offset(pgd, addr & PAGE_MASK); + pud = pud_offset(p4d, addr & PAGE_MASK); + pmd = pmd_offset(pud, addr & PAGE_MASK); if (pmd_present(*pmd)) { pte = pte_offset_kernel(pmd, addr & PAGE_MASK); if (pte) { From 7c2763c42326a071220077513a9cae90db46b818 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:08 -0800 Subject: [PATCH 80/86] nds32: use pgtable-nopmd instead of 4level-fixup nds32 has only two-level page tables and can use pgtable-nopmd and folding of the upper layers. Replace usage of include/asm-generic/4level-fixup.h and explicit definition of __PAGETABLE_PMD_FOLDED in nds32 with include/asm-generic/pgtable-nopmd.h and adjust page table manipulation macros and functions accordingly. Link: http://lkml.kernel.org/r/1572938135-31886-8-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nds32/include/asm/page.h | 3 --- arch/nds32/include/asm/pgalloc.h | 3 --- arch/nds32/include/asm/pgtable.h | 12 +----------- arch/nds32/include/asm/tlb.h | 1 - arch/nds32/kernel/pm.c | 4 +++- arch/nds32/mm/fault.c | 16 +++++++++++++--- arch/nds32/mm/init.c | 11 ++++++++--- arch/nds32/mm/mm-nds32.c | 6 +++++- arch/nds32/mm/proc.c | 26 +++++++++++++++++--------- 9 files changed, 47 insertions(+), 35 deletions(-) diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index 8feb1fa12f01..86b32014c5f9 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -41,17 +41,14 @@ void clear_page(void *page); void copy_page(void *to, void *from); typedef unsigned long pte_t; -typedef unsigned long pmd_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) -#define pmd_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) -#define __pmd(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h index 37125e6884d7..85c117347c86 100644 --- a/arch/nds32/include/asm/pgalloc.h +++ b/arch/nds32/include/asm/pgalloc.h @@ -15,9 +15,6 @@ /* * Since we have only two-level page tables, these are trivial */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(mm, pmd) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() #define pmd_pgtable(pmd) pmd_page(pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6fbf251cfc26..0214e4150539 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -4,8 +4,7 @@ #ifndef _ASMNDS32_PGTABLE_H #define _ASMNDS32_PGTABLE_H -#define __PAGETABLE_PMD_FOLDED 1 -#include +#include #include #include @@ -18,26 +17,20 @@ #ifdef CONFIG_ANDES_PAGE_SIZE_4KB #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 -#define PMD_SHIFT 22 -#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 #endif #ifdef CONFIG_ANDES_PAGE_SIZE_8KB #define PGDIR_SHIFT 24 #define PTRS_PER_PGD 256 -#define PMD_SHIFT 24 -#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 2048 #endif #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) #endif /* !__ASSEMBLY__ */ @@ -368,9 +361,6 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) -/* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, addr) ((pmd_t *)(dir)) - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const unsigned long mask = 0xfff; diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h index a8aff1c8b4f4..672603804a3b 100644 --- a/arch/nds32/include/asm/tlb.h +++ b/arch/nds32/include/asm/tlb.h @@ -7,6 +7,5 @@ #include #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) -#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tln)->mm, pmd) #endif diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c index ffa8040d8be7..e25700e125d8 100644 --- a/arch/nds32/kernel/pm.c +++ b/arch/nds32/kernel/pm.c @@ -14,6 +14,7 @@ unsigned int *phy_addr_sp_tmp; static void nds32_suspend2ram(void) { pgd_t *pgdv; + p4d_t *p4dv; pud_t *pudv; pmd_t *pmdv; pte_t *ptev; @@ -21,7 +22,8 @@ static void nds32_suspend2ram(void) pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) & L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume); - pudv = pud_offset(pgdv, (unsigned int)cpu_resume); + p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume); + pudv = pud_offset(p4dv, (unsigned int)cpu_resume); pmdv = pmd_offset(pudv, (unsigned int)cpu_resume); ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume); diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 064ae5d2159d..906dfb25353c 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -31,6 +31,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr) pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); do { + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; if (pgd_none(*pgd)) @@ -41,7 +43,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; } - pmd = pmd_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_offset(p4d, addr); + pmd = pmd_offset(pud, addr); #if PTRS_PER_PMD != 1 pr_alert(", *pmd=%08lx", pmd_val(*pmd)); #endif @@ -359,6 +363,7 @@ vmalloc_fault: unsigned int index = pgd_index(addr); pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; @@ -369,8 +374,13 @@ vmalloc_fault: if (!pgd_present(*pgd_k)) goto no_context; - pud = pud_offset(pgd, addr); - pud_k = pud_offset(pgd_k, addr); + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + if (!p4d_present(*p4d_k)) + goto no_context; + + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); if (!pud_present(*pud_k)) goto no_context; diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 55703b03d172..0be3833f6814 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -54,6 +54,7 @@ static void __init map_ram(void) { unsigned long v, p, e; pgd_t *pge; + p4d_t *p4e; pud_t *pue; pmd_t *pme; pte_t *pte; @@ -69,7 +70,8 @@ static void __init map_ram(void) while (p < e) { int j; - pue = pud_offset(pge, v); + p4e = p4d_offset(pge, v); + pue = pud_offset(p4e, v); pme = pmd_offset(pue, v); if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) { @@ -100,6 +102,7 @@ static void __init fixedrange_init(void) { unsigned long vaddr; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; #ifdef CONFIG_HIGHMEM @@ -111,7 +114,8 @@ static void __init fixedrange_init(void) */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); pmd = pmd_offset(pud, vaddr); fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!fixmap_pmd_p) @@ -126,7 +130,8 @@ static void __init fixedrange_init(void) vaddr = PKMAP_BASE; pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); pmd = pmd_offset(pud, vaddr); pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pte) diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c index 3b43798d754f..8503bee882d1 100644 --- a/arch/nds32/mm/mm-nds32.c +++ b/arch/nds32/mm/mm-nds32.c @@ -74,6 +74,8 @@ void setup_mm_for_reboot(char mode) { unsigned long pmdval; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; int i; @@ -84,7 +86,9 @@ void setup_mm_for_reboot(char mode) for (i = 0; i < USER_PTRS_PER_PGD; i++) { pmdval = (i << PGDIR_SHIFT); - pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT); + p4d = p4d_offset(pgd, i << PGDIR_SHIFT); + pud = pud_offset(p4d, i << PGDIR_SHIFT); + pmd = pmd_offset(pud + i, i << PGDIR_SHIFT); set_pmd(pmd, __pmd(pmdval)); } } diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c index ba80992d13a2..837ae7728830 100644 --- a/arch/nds32/mm/proc.c +++ b/arch/nds32/mm/proc.c @@ -16,10 +16,14 @@ extern struct cache_info L1_cache_info[2]; int va_kernel_present(unsigned long addr) { + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *ptep, pte; - pmd = pmd_offset(pgd_offset_k(addr), addr); + p4d = p4d_offset(pgd_offset_k(addr), addr); + pud = pud_offset(p4d, addr); + pmd = pmd_offset(pud, addr); if (!pmd_none(*pmd)) { ptep = pte_offset_map(pmd, addr); pte = *ptep; @@ -32,20 +36,24 @@ int va_kernel_present(unsigned long addr) pte_t va_present(struct mm_struct * mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep, pte; pgd = pgd_offset(mm, addr); if (!pgd_none(*pgd)) { - pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - return pte; + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) { + pud = pud_offset(p4d, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + return pte; + } } } } From d96885e277b5edcd1e474e8b1579005163f23dbe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:12 -0800 Subject: [PATCH 81/86] parisc: use pgtable-nopXd instead of 4level-fixup parisc has two or three levels of page tables and can use appropriate pgtable-nopXd and folding of the upper layers. Replace usage of include/asm-generic/4level-fixup.h and explicit definitions of __PAGETABLE_PxD_FOLDED in parisc with include/asm-generic/pgtable-nopmd.h for two-level configurations and with include/asm-generic/pgtable-nopud.h for three-lelve configurations and adjust page table manipulation macros and functions accordingly. Link: http://lkml.kernel.org/r/1572938135-31886-9-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Helge Deller Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/page.h | 30 +++++++++++------- arch/parisc/include/asm/pgalloc.h | 41 +++++++++--------------- arch/parisc/include/asm/pgtable.h | 52 +++++++++++++++---------------- arch/parisc/include/asm/tlb.h | 2 ++ arch/parisc/kernel/cache.c | 13 +++++--- arch/parisc/kernel/pci-dma.c | 9 ++++-- arch/parisc/mm/fixmap.c | 10 ++++-- 7 files changed, 81 insertions(+), 76 deletions(-) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 93caf17ac5e2..796ae29e9b9a 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */ /* NOTE: even on 64 bits, these entries are __u32 because we allocate * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */ -typedef struct { __u32 pmd; } pmd_t; typedef struct { __u32 pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) ((x).pte) -/* These do not work lvalues, so make sure we don't use them as such. */ +#if CONFIG_PGTABLE_LEVELS == 3 +typedef struct { __u32 pmd; } pmd_t; +#define __pmd(x) ((pmd_t) { (x) } ) +/* pXd_val() do not work as lvalues, so make sure we don't use them as such. */ #define pmd_val(x) ((x).pmd + 0) +#endif + +#define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd + 0) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -#define __pmd_val_set(x,n) (x).pmd = (n) -#define __pgd_val_set(x,n) (x).pgd = (n) - #else /* * .. while these make it easier on the compiler */ typedef unsigned long pte_t; + +#if CONFIG_PGTABLE_LEVELS == 3 typedef __u32 pmd_t; +#define pmd_val(x) (x) +#define __pmd(x) (x) +#endif + typedef __u32 pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) -#define pmd_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) -#define __pmd(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) -#define __pmd_val_set(x,n) (x) = (n) -#define __pgd_val_set(x,n) (x) = (n) - #endif /* STRICT_MM_TYPECHECKS */ +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#if CONFIG_PGTABLE_LEVELS == 3 +#define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) +#endif + typedef struct page *pgtable_t; typedef struct __physmem_range { diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index d98647c29b74..9ac74da256b8 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) /* Populate first pmd with allocated memory. We mark it * with PxD_FLAG_ATTACHED as a signal to the system that this * pmd entry may not be cleared. */ - __pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | - PxD_FLAG_VALID | - PxD_FLAG_ATTACHED) - + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)); + set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT | + PxD_FLAG_VALID | + PxD_FLAG_ATTACHED) + + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT))); /* The first pmd entry also is marked with PxD_FLAG_ATTACHED as * a signal that this pmd may not be freed */ - __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); + set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED)); #endif } spin_lock_init(pgd_spinlock(actual_pgd)); @@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) /* Three Level Page Table Support for pmd's */ -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - __pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) + - (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)); + set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) + + (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT))); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) @@ -88,19 +88,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) free_pages((unsigned long)pmd, PMD_ORDER); } -#else - -/* Two Level Page Table Support for pmd's */ - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - */ - -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(mm, x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() - #endif static inline void @@ -110,14 +97,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) /* preserve the gateway marker if this is the beginning of * the permanent pmd */ if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) - __pmd_val_set(*pmd, (PxD_FLAG_PRESENT | - PxD_FLAG_VALID | - PxD_FLAG_ATTACHED) - + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)); + set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | + PxD_FLAG_VALID | + PxD_FLAG_ATTACHED) + + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT))); else #endif - __pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) - + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)); + set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID) + + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT))); } #define pmd_populate(mm, pmd, pte_page) \ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 4ac374b3a99f..f0a365950536 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -3,7 +3,12 @@ #define _PARISC_PGTABLE_H #include -#include + +#if CONFIG_PGTABLE_LEVELS == 3 +#include +#elif CONFIG_PGTABLE_LEVELS == 2 +#include +#endif #include @@ -101,8 +106,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#if CONFIG_PGTABLE_LEVELS == 3 #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e)) +#endif #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) @@ -132,19 +139,18 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define PTRS_PER_PTE (1UL << BITS_PER_PTE) /* Definitions for 2nd level */ +#if CONFIG_PGTABLE_LEVELS == 3 #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#if CONFIG_PGTABLE_LEVELS == 3 #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) +#define PTRS_PER_PMD (1UL << BITS_PER_PMD) #else -#define __PAGETABLE_PMD_FOLDED 1 #define BITS_PER_PMD 0 #endif -#define PTRS_PER_PMD (1UL << BITS_PER_PMD) /* Definitions for 1st level */ -#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD) +#define PGDIR_SHIFT (PLD_SHIFT + BITS_PER_PTE + BITS_PER_PMD) #if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG #define BITS_PER_PGD (BITS_PER_LONG - PGDIR_SHIFT) #else @@ -317,6 +323,8 @@ extern unsigned long *empty_zero_page; #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) +#define pud_flag(x) (pud_val(x) & PxD_FLAG_MASK) +#define pud_address(x) ((unsigned long)(pud_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -334,42 +342,32 @@ static inline void pmd_clear(pmd_t *pmd) { if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) /* This is the entry pointing to the permanent pmd * attached to the pgd; cannot clear it */ - __pmd_val_set(*pmd, PxD_FLAG_ATTACHED); + set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED)); else #endif - __pmd_val_set(*pmd, 0); + set_pmd(pmd, __pmd(0)); } #if CONFIG_PGTABLE_LEVELS == 3 -#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) -#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud))) +#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) /* For 64 bit we have three level tables */ -#define pgd_none(x) (!pgd_val(x)) -#define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) -#define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) -static inline void pgd_clear(pgd_t *pgd) { +#define pud_none(x) (!pud_val(x)) +#define pud_bad(x) (!(pud_flag(x) & PxD_FLAG_VALID)) +#define pud_present(x) (pud_flag(x) & PxD_FLAG_PRESENT) +static inline void pud_clear(pud_t *pud) { #if CONFIG_PGTABLE_LEVELS == 3 - if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) - /* This is the permanent pmd attached to the pgd; cannot + if(pud_flag(*pud) & PxD_FLAG_ATTACHED) + /* This is the permanent pmd attached to the pud; cannot * free it */ return; #endif - __pgd_val_set(*pgd, 0); + set_pud(pud, __pud(0)); } -#else -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t * pgdp) { } #endif /* @@ -452,7 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #if CONFIG_PGTABLE_LEVELS == 3 #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define pmd_offset(dir,address) \ -((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address)) +((pmd_t *) pud_page_vaddr(*(dir)) + pmd_index(address)) #else #define pmd_offset(dir,addr) ((pmd_t *) dir) #endif diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h index 8c0446b04c9e..44235f367674 100644 --- a/arch/parisc/include/asm/tlb.h +++ b/arch/parisc/include/asm/tlb.h @@ -4,7 +4,9 @@ #include +#if CONFIG_PGTABLE_LEVELS == 3 #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) +#endif #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 2407b0b789d3..1eedfecc5137 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -534,11 +534,14 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr) pte_t *ptep = NULL; if (!pgd_none(*pgd)) { - pud_t *pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd_t *pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) - ptep = pte_offset_map(pmd, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) { + pud_t *pud = pud_offset(p4d, addr); + if (!pud_none(*pud)) { + pmd_t *pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + ptep = pte_offset_map(pmd, addr); + } } } return ptep; diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index a60d47fd4d55..0f1b460ee715 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -133,9 +133,14 @@ static inline int map_uncached_pages(unsigned long vaddr, unsigned long size, dir = pgd_offset_k(vaddr); do { + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; - - pmd = pmd_alloc(NULL, dir, vaddr); + + p4d = p4d_offset(dir, vaddr); + pud = pud_offset(p4d, vaddr); + pmd = pmd_alloc(NULL, pud, vaddr); + if (!pmd) return -ENOMEM; if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr)) diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index 474cd241c150..e2d8b0a857ee 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -14,11 +14,13 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); - pmd_t *pmd = pmd_offset(pgd, vaddr); + p4d_t *p4d = p4d_offset(pgd, vaddr); + pud_t *pud = pud_offset(p4d, vaddr); + pmd_t *pmd = pmd_offset(pud, vaddr); pte_t *pte; if (pmd_none(*pmd)) - pmd = pmd_alloc(NULL, pgd, vaddr); + pmd = pmd_alloc(NULL, pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); if (pte_none(*pte)) @@ -32,7 +34,9 @@ void notrace clear_fixmap(enum fixed_addresses idx) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); - pmd_t *pmd = pmd_offset(pgd, vaddr); + p4d_t *p4d = p4d_offset(pgd, vaddr); + pud_t *pud = pud_offset(p4d, vaddr); + pmd_t *pmd = pmd_offset(pud, vaddr); pte_t *pte = pte_offset_kernel(pmd, vaddr); if (WARN_ON(pte_none(*pte))) From 2fa245c1f8c9f68e26174fade45ccae5b060751d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 4 Dec 2019 16:54:16 -0800 Subject: [PATCH 82/86] parisc/hugetlb: use pgtable-nopXd instead of 4level-fixup Link: http://lkml.kernel.org/r/1572938135-31886-10-git-send-email-rppt@kernel.org Signed-off-by: Helge Deller Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/mm/hugetlbpage.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index d578809e55cf..0e1e212f1c96 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -49,6 +49,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte = NULL; @@ -61,7 +62,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, addr &= HPAGE_MASK; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) @@ -74,6 +76,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte = NULL; @@ -82,11 +85,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, pgd = pgd_offset(mm, addr); if (!pgd_none(*pgd)) { - pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) - pte = pte_offset_map(pmd, addr); + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) { + pud = pud_offset(p4d, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } } } return pte; From 7235db268a2777bc380b99b7db49ff7b19c8fb76 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:20 -0800 Subject: [PATCH 83/86] sparc32: use pgtable-nopud instead of 4level-fixup 32-bit version of sparc has three-level page tables and can use pgtable-nopud and folding of the upper layers. Replace usage of include/asm-generic/4level-fixup.h with include/asm-generic/pgtable-nopud.h and adjust page table manipulation macros and functions accordingly. Link: http://lkml.kernel.org/r/1572938135-31886-11-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David S. Miller Tested-by: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgalloc_32.h | 6 ++-- arch/sparc/include/asm/pgtable_32.h | 28 ++++++++-------- arch/sparc/mm/fault_32.c | 11 +++++-- arch/sparc/mm/highmem.c | 6 +++- arch/sparc/mm/io-unit.c | 6 +++- arch/sparc/mm/iommu.c | 6 +++- arch/sparc/mm/srmmu.c | 51 ++++++++++++++++++++++------- 7 files changed, 81 insertions(+), 33 deletions(-) diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h index 10538a4d1a1e..eae0c92ec422 100644 --- a/arch/sparc/include/asm/pgalloc_32.h +++ b/arch/sparc/include/asm/pgalloc_32.h @@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd) #define pgd_free(mm, pgd) free_pgd_fast(pgd) #define pgd_alloc(mm) get_pgd_fast() -static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp) +static inline void pud_set(pud_t * pudp, pmd_t * pmdp) { unsigned long pa = __nocache_pa(pmdp); - set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4)))); + set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4)))); } -#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) +#define pud_populate(MM, PGD, PMD) pud_set(PGD, PMD) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 31da44826645..6d6f44c0cad9 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -12,7 +12,7 @@ #include #ifndef __ASSEMBLY__ -#include +#include #include #include @@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd) return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4)); } -static inline unsigned long pgd_page_vaddr(pgd_t pgd) +static inline unsigned long pud_page_vaddr(pud_t pud) { - if (srmmu_device_memory(pgd_val(pgd))) { + if (srmmu_device_memory(pud_val(pud))) { return ~0; } else { - unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK; + unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK; return (unsigned long)__nocache_va(v << 4); } } @@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp) set_pte((pte_t *)&pmdp->pmdv[i], __pte(0)); } -static inline int pgd_none(pgd_t pgd) +static inline int pud_none(pud_t pud) { - return !(pgd_val(pgd) & 0xFFFFFFF); + return !(pud_val(pud) & 0xFFFFFFF); } -static inline int pgd_bad(pgd_t pgd) +static inline int pud_bad(pud_t pud) { - return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD; + return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD; } -static inline int pgd_present(pgd_t pgd) +static inline int pud_present(pud_t pud) { - return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD); + return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD); } -static inline void pgd_clear(pgd_t *pgdp) +static inline void pud_clear(pud_t *pudp) { - set_pte((pte_t *)pgdp, __pte(0)); + set_pte((pte_t *)pudp, __pte(0)); } /* @@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pgd_offset_k(address) pgd_offset(&init_mm, address) /* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address) +static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address) { - return (pmd_t *) pgd_page_vaddr(*dir) + + return (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 8d69de111470..89976c9b936c 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -351,6 +351,8 @@ vmalloc_fault: */ int offset = pgd_index(address); pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pgd = tsk->active_mm->pgd + offset; @@ -363,8 +365,13 @@ vmalloc_fault: return; } - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); + pmd = pmd_offset(pud, address); + + p4d_k = p4d_offset(pgd_k, address); + pud_k = pud_offset(p4d_k, address); + pmd_k = pmd_offset(pud_k, address); if (pmd_present(*pmd) || !pmd_present(*pmd_k)) goto bad_area_nosemaphore; diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 86bc2a58d26c..d4a80adea7e5 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -39,10 +39,14 @@ static pte_t *kmap_pte; void __init kmap_init(void) { unsigned long address; + p4d_t *p4d; + pud_t *pud; pmd_t *dir; address = __fix_to_virt(FIX_KMAP_BEGIN); - dir = pmd_offset(pgd_offset_k(address), address); + p4d = p4d_offset(pgd_offset_k(address), address); + pud = pud_offset(p4d, address); + dir = pmd_offset(pud, address); /* cache the first kmap pte */ kmap_pte = pte_offset_kernel(dir, address); diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index f770ee7229d8..33a0facd9eb5 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -239,12 +239,16 @@ static void *iounit_alloc(struct device *dev, size_t len, page = va; { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; long i; pgdp = pgd_offset(&init_mm, addr); - pmdp = pmd_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_map(pmdp, addr); set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot)); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 71ac353032b6..4d3c6991f0ae 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -343,6 +343,8 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len, page = va; { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -354,7 +356,9 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len, __flush_page_to_ram(page); pgdp = pgd_offset(&init_mm, addr); - pmdp = pmd_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + pmdp = pmd_offset(pudp, addr); ptep = pte_offset_map(pmdp, addr); set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot)); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index cc3ad64479ac..f56c3c9a9793 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -296,6 +296,8 @@ static void __init srmmu_nocache_init(void) void *srmmu_nocache_bitmap; unsigned int bitmap_bits; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long paddr, vaddr; @@ -329,6 +331,8 @@ static void __init srmmu_nocache_init(void) while (vaddr < srmmu_nocache_end) { pgd = pgd_offset_k(vaddr); + p4d = p4d_offset(__nocache_fix(pgd), vaddr); + pud = pud_offset(__nocache_fix(p4d), vaddr); pmd = pmd_offset(__nocache_fix(pgd), vaddr); pte = pte_offset_kernel(__nocache_fix(pmd), vaddr); @@ -516,13 +520,17 @@ static inline void srmmu_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_type) { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long tmp; physaddr &= PAGE_MASK; pgdp = pgd_offset_k(virt_addr); - pmdp = pmd_offset(pgdp, virt_addr); + p4dp = p4d_offset(pgdp, virt_addr); + pudp = pud_offset(p4dp, virt_addr); + pmdp = pmd_offset(pudp, virt_addr); ptep = pte_offset_kernel(pmdp, virt_addr); tmp = (physaddr >> 4) | SRMMU_ET_PTE; @@ -551,11 +559,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa, static inline void srmmu_unmapioaddr(unsigned long virt_addr) { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; + pgdp = pgd_offset_k(virt_addr); - pmdp = pmd_offset(pgdp, virt_addr); + p4dp = p4d_offset(pgdp, virt_addr); + pudp = pud_offset(p4dp, virt_addr); + pmdp = pmd_offset(pudp, virt_addr); ptep = pte_offset_kernel(pmdp, virt_addr); /* No need to flush uncacheable page. */ @@ -693,20 +706,24 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end) { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; while (start < end) { pgdp = pgd_offset_k(start); - if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { + p4dp = p4d_offset(pgdp, start); + pudp = pud_offset(p4dp, start); + if (pud_none(*(pud_t *)__nocache_fix(pudp))) { pmdp = __srmmu_get_nocache( SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE); - pgd_set(__nocache_fix(pgdp), pmdp); + pud_set(__nocache_fix(pudp), pmdp); } - pmdp = pmd_offset(__nocache_fix(pgdp), start); + pmdp = pmd_offset(__nocache_fix(pudp), start); if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) @@ -724,19 +741,23 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long end) { pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; while (start < end) { pgdp = pgd_offset_k(start); - if (pgd_none(*pgdp)) { + p4dp = p4d_offset(pgdp, start); + pudp = pud_offset(p4dp, start); + if (pud_none(*pudp)) { pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE); - pgd_set(pgdp, pmdp); + pud_set((pud_t *)pgdp, pmdp); } - pmdp = pmd_offset(pgdp, start); + pmdp = pmd_offset(pudp, start); if (srmmu_pmd_none(*pmdp)) { ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); @@ -779,6 +800,8 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start, unsigned long probed; unsigned long addr; pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */ @@ -810,18 +833,20 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start, } pgdp = pgd_offset_k(start); + p4dp = p4d_offset(pgdp, start); + pudp = pud_offset(p4dp, start); if (what == 2) { *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed); start += SRMMU_PGDIR_SIZE; continue; } - if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { + if (pud_none(*(pud_t *)__nocache_fix(pudp))) { pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE); - pgd_set(__nocache_fix(pgdp), pmdp); + pud_set(__nocache_fix(pudp), pmdp); } pmdp = pmd_offset(__nocache_fix(pgdp), start); if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { @@ -906,6 +931,8 @@ void __init srmmu_paging_init(void) phandle cpunode; char node_str[128]; pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long pages_avail; @@ -967,7 +994,9 @@ void __init srmmu_paging_init(void) srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END); pgd = pgd_offset_k(PKMAP_BASE); - pmd = pmd_offset(pgd, PKMAP_BASE); + p4d = p4d_offset(pgd, PKMAP_BASE); + pud = pud_offset(p4d, PKMAP_BASE); + pmd = pmd_offset(pud, PKMAP_BASE); pte = pte_offset_kernel(pmd, PKMAP_BASE); pkmap_page_table = pte; From 4e65e76f1e588e6f5d263652179d51b31a2be855 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:24 -0800 Subject: [PATCH 84/86] um: remove unused pxx_offset_proc() and addr_pte() functions The pxx_offset_proc() and addr_pte() functions are never used. Remove them. Link: http://lkml.kernel.org/r/1572938135-31886-12-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Richard Weinberger Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/tlb.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index b7eaf655635c..8425a22142b7 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -490,35 +490,6 @@ kill: force_sig(SIGKILL); } -pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) -{ - return pgd_offset(mm, address); -} - -pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) -{ - return pud_offset(pgd, address); -} - -pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) -{ - return pmd_offset(pud, address); -} - -pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) -{ - return pte_offset_kernel(pmd, address); -} - -pte_t *addr_pte(struct task_struct *task, unsigned long addr) -{ - pgd_t *pgd = pgd_offset(task->mm, addr); - pud_t *pud = pud_offset(pgd, addr); - pmd_t *pmd = pmd_offset(pud, addr); - - return pte_offset_map(pmd, addr); -} - void flush_tlb_all(void) { /* From e19f97ed67d8f9b60e4ce14a7551d3dd45825570 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:28 -0800 Subject: [PATCH 85/86] um: add support for folded p4d page tables The UML port uses 4 and 5 level fixups to support higher level page table directories in the generic VM code. Implement primitives necessary for the 4th level folding, add walks of p4d level where appropriate and drop usage of __ARCH_USE_5LEVEL_HACK. Link: http://lkml.kernel.org/r/1572938135-31886-13-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/asm/pgtable-2level.h | 1 - arch/um/include/asm/pgtable-3level.h | 1 - arch/um/include/asm/pgtable.h | 3 ++ arch/um/kernel/mem.c | 8 +++- arch/um/kernel/skas/mmu.c | 12 +++++- arch/um/kernel/skas/uaccess.c | 7 +++- arch/um/kernel/tlb.c | 56 +++++++++++++++++++++++++--- arch/um/kernel/trap.c | 4 +- 8 files changed, 78 insertions(+), 14 deletions(-) diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 32b3d26a7109..32106d31e4ab 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,7 +8,6 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H -#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 9812269fefc9..8a3b689e0f86 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,7 +7,6 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H -#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 36a44d58f373..2daa58df2190 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long end_iomem; #define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) #define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) +#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) + #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) #define pte_page(x) pfn_to_page(pte_pfn(x)) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 417ff647fb37..30885d0b94ac 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -96,6 +96,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; int i, j; @@ -107,7 +108,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end, pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { - pud = pud_offset(pgd, vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); if (pud_none(*pud)) one_md_table_init(pud); pmd = pmd_offset(pud, vaddr); @@ -124,6 +126,7 @@ static void __init fixaddr_user_init( void) #ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -144,7 +147,8 @@ static void __init fixaddr_user_init( void) for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, p += PAGE_SIZE) { pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pte_set_val(*pte, p, PAGE_READONLY); diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index b5e3d91fc9c2..3f0d9a573fd6 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -19,15 +19,21 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, unsigned long kernel) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = pgd_offset(mm, proc); - pud = pud_alloc(mm, pgd, proc); - if (!pud) + + p4d = p4d_alloc(mm, pgd, proc); + if (!p4d) goto out; + pud = pud_alloc(mm, p4d, proc); + if (!pud) + goto out_pud; + pmd = pmd_alloc(mm, pud, proc); if (!pmd) goto out_pmd; @@ -44,6 +50,8 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, pmd_free(mm, pmd); out_pmd: pud_free(mm, pud); + out_pud: + p4d_free(mm, p4d); out: return -ENOMEM; } diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 3236052f20e6..d617f8dc9c19 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -17,6 +17,7 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) if (!pgd_present(*pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); if (!pud_present(*pud)) return NULL; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 8425a22142b7..80a358c6d652 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -277,7 +277,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, return ret; } -static inline int update_pud_range(pgd_t *pgd, unsigned long addr, +static inline int update_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, struct host_vm_change *hvc) { @@ -285,7 +285,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr, unsigned long next; int ret = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { @@ -299,6 +299,28 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr, return ret; } +static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + p4d_t *p4d; + unsigned long next; + int ret = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (!p4d_present(*p4d)) { + if (hvc->force || p4d_newpage(*p4d)) { + ret = add_munmap(addr, next - addr, hvc); + p4d_mkuptodate(*p4d); + } + } else + ret = update_pud_range(p4d, addr, next, hvc); + } while (p4d++, addr = next, ((addr < end) && !ret)); + return ret; +} + void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { @@ -316,8 +338,8 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, ret = add_munmap(addr, next - addr, &hvc); pgd_mkuptodate(*pgd); } - } - else ret = update_pud_range(pgd, addr, next, &hvc); + } else + ret = update_p4d_range(pgd, addr, next, &hvc); } while (pgd++, addr = next, ((addr < end_addr) && !ret)); if (!ret) @@ -338,6 +360,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) { struct mm_struct *mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -364,7 +387,23 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) continue; } - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) { + last = ADD_ROUND(addr, P4D_SIZE); + if (last > end) + last = end; + if (p4d_newpage(*p4d)) { + updated = 1; + err = add_munmap(addr, last - addr, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pud = pud_offset(p4d, addr); if (!pud_present(*pud)) { last = ADD_ROUND(addr, PUD_SIZE); if (last > end) @@ -424,6 +463,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -437,7 +477,11 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) if (!pgd_present(*pgd)) goto kill; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto kill; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto kill; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index e62296c66c95..818553064f04 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -28,6 +28,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -104,7 +105,8 @@ good_area: } pgd = pgd_offset(mm, address); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); From f949286c668aed5aa24acdb5838be9cfd9513bd3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:32 -0800 Subject: [PATCH 86/86] mm: remove __ARCH_HAS_4LEVEL_HACK and include/asm-generic/4level-fixup.h There are no architectures that use include/asm-generic/4level-fixup.h therefore it can be removed along with __ARCH_HAS_4LEVEL_HACK define. Link: http://lkml.kernel.org/r/1572938135-31886-14-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/4level-fixup.h | 39 ------------------------------ include/linux/mm.h | 12 ++++----- mm/memory.c | 8 ------ 3 files changed, 6 insertions(+), 53 deletions(-) delete mode 100644 include/asm-generic/4level-fixup.h diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h deleted file mode 100644 index c86cf7cb4bba..000000000000 --- a/include/asm-generic/4level-fixup.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _4LEVEL_FIXUP_H -#define _4LEVEL_FIXUP_H - -#define __ARCH_HAS_4LEVEL_HACK -#define __PAGETABLE_PUD_FOLDED 1 - -#define PUD_SHIFT PGDIR_SHIFT -#define PUD_SIZE PGDIR_SIZE -#define PUD_MASK PGDIR_MASK -#define PTRS_PER_PUD 1 - -#define pud_t pgd_t - -#define pmd_alloc(mm, pud, address) \ - ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ - NULL: pmd_offset(pud, address)) - -#define pud_offset(pgd, start) (pgd) -#define pud_none(pud) 0 -#define pud_bad(pud) 0 -#define pud_present(pud) 1 -#define pud_ERROR(pud) do { } while (0) -#define pud_clear(pud) pgd_clear(pud) -#define pud_val(pud) pgd_val(pud) -#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd) -#define pud_page(pud) pgd_page(pud) -#define pud_page_vaddr(pud) pgd_page_vaddr(pud) - -#undef pud_free_tlb -#define pud_free_tlb(tlb, x, addr) do { } while (0) -#define pud_free(mm, x) do { } while (0) - -#undef pud_addr_end -#define pud_addr_end(addr, end) (end) - -#include - -#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b0ef04b6d15..c97ea3b694e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1838,12 +1838,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} int __pte_alloc(struct mm_struct *mm, pmd_t *pmd); int __pte_alloc_kernel(pmd_t *pmd); -/* - * The following ifdef needed to get the 4level-fixup.h header to work. - * Remove it when 4level-fixup.h has been removed. - */ -#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) +#if defined(CONFIG_MMU) +/* + * The following ifdef needed to get the 5level-fixup.h header to work. + * Remove it when 5level-fixup.h has been removed. + */ #ifndef __ARCH_HAS_5LEVEL_HACK static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) @@ -1865,7 +1865,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? NULL: pmd_offset(pud, address); } -#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ +#endif /* CONFIG_MMU */ #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS diff --git a/mm/memory.c b/mm/memory.c index e455160e0f75..606da187d1de 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4197,19 +4197,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ ptl = pud_lock(mm, pud); -#ifndef __ARCH_HAS_4LEVEL_HACK if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); pud_populate(mm, pud, new); } else /* Another has populated it */ pmd_free(mm, new); -#else - if (!pgd_present(*pud)) { - mm_inc_nr_pmds(mm); - pgd_populate(mm, pud, new); - } else /* Another has populated it */ - pmd_free(mm, new); -#endif /* __ARCH_HAS_4LEVEL_HACK */ spin_unlock(ptl); return 0; }