From 8ee912dab95f1483156b6e994004bfcc3158d798 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:14:15 -0700 Subject: [PATCH 01/51] alpha: fix build failures The build of alpha allmodconfig is giving error: arch/alpha/include/asm/mmu_context.h: In function 'ev5_switch_mm': arch/alpha/include/asm/mmu_context.h:160:2: error: implicit declaration of function 'task_thread_info'; did you mean 'init_thread_info'? [-Werror=implicit-function-declaration] The file 'mmu_context.h' needed an extra header file. Link: http://lkml.kernel.org/r/1505668810-7497-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h index 384bd47b5187..45c020a0fe76 100644 --- a/arch/alpha/include/asm/mmu_context.h +++ b/arch/alpha/include/asm/mmu_context.h @@ -8,6 +8,7 @@ */ #include +#include #include #include From 630cc2b30a42c70628368a412beb4a5e5dd71abe Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:14:18 -0700 Subject: [PATCH 02/51] kernel/params.c: align add_sysfs_param documentation with code This parameter is named kp, so the documentation should use that. Fixes: 9b473de87209 ("param: Fix duplicate module prefixes") Link: http://lkml.kernel.org/r/20170919142656.64aea59e@endymion Signed-off-by: Jean Delvare Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index 60b2d8101355..1cd8f1a895a8 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -600,7 +600,7 @@ EXPORT_SYMBOL(kernel_param_unlock); /* * add_sysfs_param - add a parameter to sysfs * @mk: struct module_kobject - * @kparam: the actual parameter definition to add to sysfs + * @kp: the actual parameter definition to add to sysfs * @name: name of parameter * * Create a kobject if for a (per-module) parameter if mp NULL, and From e00e5a26e3d37b47ce8ca59611db1e6135790ef8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 16:14:21 -0700 Subject: [PATCH 03/51] scripts/spelling.txt: add more spelling mistakes to spelling.txt Here are some of the more spelling mistakes and typos that I've found while fixing up spelling mistakes in kernel error message text over the past eight weeks. [akpm@linux-foundation.org: s/|/||/, per Joe] Link: http://lkml.kernel.org/r/20170919090818.5989-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Kees Cook Cc: Masahiro Yamada Cc: Stephen Boyd Cc: Joe Perches Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/spelling.txt | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 400ef35169c5..aa0cc49ad1ad 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -53,6 +53,7 @@ acumulator||accumulator adapater||adapter addional||additional additionaly||additionally +additonal||additional addres||address adddress||address addreses||addresses @@ -67,6 +68,8 @@ adviced||advised afecting||affecting againt||against agaist||against +aggreataon||aggregation +aggreation||aggregation albumns||albums alegorical||allegorical algined||aligned @@ -80,6 +83,8 @@ aligment||alignment alignement||alignment allign||align alligned||aligned +alllocate||allocate +alloated||allocated allocatote||allocate allocatrd||allocated allocte||allocate @@ -171,6 +176,7 @@ availale||available availavility||availability availble||available availiable||available +availible||available avalable||available avaliable||available aysnc||async @@ -203,6 +209,7 @@ broadcat||broadcast cacluated||calculated caculation||calculation calender||calendar +calescing||coalescing calle||called callibration||calibration calucate||calculate @@ -210,6 +217,7 @@ calulate||calculate cancelation||cancellation cancle||cancel capabilites||capabilities +capabilty||capability capabitilies||capabilities capatibilities||capabilities capapbilities||capabilities @@ -302,6 +310,7 @@ containts||contains contaisn||contains contant||contact contence||contents +continious||continuous continous||continuous continously||continuously continueing||continuing @@ -393,6 +402,7 @@ differrence||difference diffrent||different diffrentiate||differentiate difinition||definition +dimesions||dimensions diplay||display direectly||directly disassocation||disassociation @@ -449,6 +459,7 @@ equiped||equipped equivelant||equivalent equivilant||equivalent eror||error +errorr||error estbalishment||establishment etsablishment||establishment etsbalishment||establishment @@ -481,6 +492,7 @@ failied||failed faillure||failure failue||failure failuer||failure +failng||failing faireness||fairness falied||failed faliure||failure @@ -493,6 +505,7 @@ fetaure||feature fetaures||features fileystem||filesystem fimware||firmware +firware||firmware finanize||finalize findn||find finilizes||finalizes @@ -502,6 +515,7 @@ folloing||following followign||following followings||following follwing||following +fonud||found forseeable||foreseeable forse||force fortan||fortran @@ -532,6 +546,7 @@ grabing||grabbing grahical||graphical grahpical||graphical grapic||graphic +grranted||granted guage||gauge guarenteed||guaranteed guarentee||guarantee @@ -543,6 +558,7 @@ happend||happened harware||hardware heirarchically||hierarchically helpfull||helpful +hybernate||hibernate hierachy||hierarchy hierarchie||hierarchy howver||however @@ -565,16 +581,19 @@ implemenation||implementation implementaiton||implementation implementated||implemented implemention||implementation +implementd||implemented implemetation||implementation implemntation||implementation implentation||implementation implmentation||implementation implmenting||implementing +incative||inactive incomming||incoming incompatabilities||incompatibilities incompatable||incompatible inconsistant||inconsistent increas||increase +incremeted||incremented incrment||increment indendation||indentation indended||intended @@ -619,6 +638,7 @@ interger||integer intermittant||intermittent internel||internal interoprability||interoperability +interuupt||interrupt interrface||interface interrrupt||interrupt interrup||interrupt @@ -638,8 +658,10 @@ intrrupt||interrupt intterrupt||interrupt intuative||intuitive invaid||invalid +invald||invalid invalde||invalid invalide||invalid +invalidiate||invalidate invalud||invalid invididual||individual invokation||invocation @@ -713,6 +735,7 @@ misformed||malformed mispelled||misspelled mispelt||misspelt mising||missing +mismactch||mismatch missmanaged||mismanaged missmatch||mismatch miximum||maximum @@ -731,6 +754,7 @@ multidimensionnal||multidimensional multple||multiple mumber||number muticast||multicast +mutilcast||multicast mutiple||multiple mutli||multi nams||names @@ -834,6 +858,7 @@ posible||possible positon||position possibilites||possibilities powerfull||powerful +preample||preamble preapre||prepare preceeded||preceded preceeding||preceding @@ -1059,6 +1084,7 @@ sturcture||structure subdirectoires||subdirectories suble||subtle substract||subtract +submition||submission succesfully||successfully succesful||successful successed||succeeded @@ -1078,6 +1104,7 @@ suppoted||supported suppported||supported suppport||support supress||suppress +surpressed||suppressed surpresses||suppresses susbsystem||subsystem suspeneded||suspended @@ -1091,6 +1118,7 @@ swithced||switched swithcing||switching swithed||switched swithing||switching +swtich||switch symetric||symmetric synax||syntax synchonized||synchronized @@ -1111,7 +1139,9 @@ therfore||therefore thier||their threds||threads threshhold||threshold +thresold||threshold throught||through +troughput||throughput thses||these tiggered||triggered tipically||typically @@ -1120,6 +1150,7 @@ tmis||this torerable||tolerable tramsmitted||transmitted tramsmit||transmit +tranasction||transaction tranfer||transfer transciever||transceiver transferd||transferred @@ -1133,6 +1164,7 @@ trasmission||transmission treshold||threshold trigerring||triggering trun||turn +tunning||tuning ture||true tyep||type udpate||update @@ -1199,6 +1231,7 @@ visiters||visitors vitual||virtual wakeus||wakeups wating||waiting +wiat||wait wether||whether whataver||whatever whcih||which From fa87b91c94a8fd2c8502b6761be2d08a8e9bcf55 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 3 Oct 2017 16:14:24 -0700 Subject: [PATCH 04/51] include/linux/mm.h: fix typo in VM_MPX definition There's a typo in recent change of VM_MPX definition. We want it to be VM_HIGH_ARCH_4, not VM_HIGH_ARCH_BIT_4. This bug does cause visible regressions. In arch_vma_name the vmflags are tested against VM_MPX. With the incorrect value of VM_MPX, a number of vmas (such as the stack) test positive and end up being marked as "[mpx]" in /proc/N/maps instead of their correct names. This confuses tools like rr which expect to be able to find familiar vmas. Fixes: df3735c5b40f ("x86,mpx: make mpx depend on x86-64 to free up VMA flag") Link: http://lkml.kernel.org/r/20170918140253.36856-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Rik van Riel Cc: Dave Hansen Cc: Kyle Huey Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f8c10d336e42..065d99deb847 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -240,7 +240,7 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86_INTEL_MPX) /* MPX specific bounds table or bounds directory */ -# define VM_MPX VM_HIGH_ARCH_BIT_4 +# define VM_MPX VM_HIGH_ARCH_4 #else # define VM_MPX VM_NONE #endif From 4b22927f0cbd58303aac689e378d20bf56267a39 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 3 Oct 2017 16:14:27 -0700 Subject: [PATCH 05/51] ksm: fix unlocked iteration over vmas in cmp_and_merge_page() In this place mm is unlocked, so vmas or list may change. Down read mmap_sem to protect them from modifications. Link: http://lkml.kernel.org/r/150512788393.10691.8868381099691121308.stgit@localhost.localdomain Fixes: e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") Signed-off-by: Kirill Tkhai Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: Minchan Kim Cc: zhong jiang Cc: Ingo Molnar Cc: Claudio Imbrenda Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 15dd7415f7b3..6cb60f46cce5 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item, */ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) { + struct mm_struct *mm = rmap_item->mm; struct rmap_item *tree_rmap_item; struct page *tree_page = NULL; struct stable_node *stable_node; @@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) if (ksm_use_zero_pages && (checksum == zero_checksum)) { struct vm_area_struct *vma; - vma = find_mergeable_vma(rmap_item->mm, rmap_item->address); + down_read(&mm->mmap_sem); + vma = find_mergeable_vma(mm, rmap_item->address); err = try_to_merge_one_page(vma, page, ZERO_PAGE(rmap_item->address)); + up_read(&mm->mmap_sem); /* * In case of failure, the page was not really empty, so we * need to continue. Otherwise we're done. From 19bfbe22f59a207417b2679e7e83c180419c9ec5 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Tue, 3 Oct 2017 16:14:31 -0700 Subject: [PATCH 06/51] mm, hugetlb, soft_offline: save compound page order before page migration This fixes a bug in madvise() where if you'd try to soft offline a hugepage via madvise(), while walking the address range you'd end up, using the wrong page offset due to attempting to get the compound order of a former but presently not compound page, due to dissolving the huge page (since commit c3114a84f7f9: "mm: hugetlb: soft-offline: dissolve source hugepage after successful migration"). As a result I ended up with all my free pages except one being offlined. Link: http://lkml.kernel.org/r/20170912204306.GA12053@gmail.com Fixes: c3114a84f7f9 ("mm: hugetlb: soft-offline: dissolve source hugepage after successful migration") Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Cc: Anshuman Khandual Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Hillf Danton Cc: Shaohua Li Cc: Mike Rapoport Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: David Rientjes Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 21261ff0466f..25bade36e9ca 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -625,18 +625,26 @@ static int madvise_inject_error(int behavior, { struct page *page; struct zone *zone; + unsigned int order; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - for (; start < end; start += PAGE_SIZE << - compound_order(compound_head(page))) { + + for (; start < end; start += PAGE_SIZE << order) { int ret; ret = get_user_pages_fast(start, 1, 0, &page); if (ret != 1) return ret; + /* + * When soft offlining hugepages, after migrating the page + * we dissolve it, therefore in the second loop "page" will + * no longer be a compound page, and order will be 0. + */ + order = compound_order(compound_head(page)); + if (PageHWPoison(page)) { put_page(page); continue; From b78412b8300a8453b78d2c1b0b925b66493bb011 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:34 -0700 Subject: [PATCH 07/51] sh: sh7722: remove nonexistent GPIO_PTQ7 to fix pinctrl registration Patch series "sh: sh7722/sh7757i/sh7264/sh7269: Fix pinctrl registration", v2. Magnus Damm reported that on sh7722/Migo-R, pinctrl registration fails with: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. Apparently GPIO_PTQ7 was defined in the enum, but never used. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Hence such entries are treated as pin zero, which was registered before, and pinctrl registration fails. I can't see how this ever worked, as at the time of commit f5e25ae52fef ("sh-pfc: Add sh7722 pinmux support"), pinmux_gpios[] in drivers/pinctrl/sh-pfc/pfc-sh7722.c already had the hole, and drivers/pinctrl/core.c already had the check. Some scripting revealed a few more broken drivers: - sh7757 has four holes, due to nonexistent GPIO_PT[JLNQ]7_RESV. - sh7264 and sh7269 define GPIO_PH[0-7], but don't use it with PINMUX_GPIO(). Patch 1 fixes the issue on sh7722, and was tested. Patches 3-4 should fix the issue on the other 3 SoCs, but was untested due to lack of hardware. This patch (of 4): On sh7722/Migo-R, pinctrl registration fails with: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. As GPIO_PTQ7 is defined in the enum, but never used, pinmux_pins[] contains a (zero-filled) hole. Hence this entry is treated as pin zero, which was registered before, and pinctrl registration fails. According to the datasheet, port PTQ7 does not exist. Hence remove GPIO_PTQ7 from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-2-git-send-email-geert+renesas@glider.be Fixes: 8d7b5b0af7e070b9 ("sh: Add sh7722 pinmux code") Signed-off-by: Geert Uytterhoeven Reported-by: Magnus Damm Reviewed-by: Laurent Pinchart Tested-by: Jacopo Mondi Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh4/cpu/sh7722.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 3bb74e534d0f..78961ab78a5a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -67,7 +67,7 @@ enum { GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0, /* PTQ */ - GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4, + GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4, GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0, /* PTR */ From d8ce38f69843a56da044e56b6c16aecfbc3c6e39 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:37 -0700 Subject: [PATCH 08/51] sh: sh7757: remove nonexistent GPIO_PT[JLNQ]7_RESV to fix pinctrl registration Commit 3810e96056ff ("sh: modify pinmux for SH7757 2nd cut") renamed GPIO_PT[JLNQ]7 to GPIO_PT[JLNQ]7_RESV, and removed the existing users from the pinmux_pins[] array. However, pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. Hence entries were not really removed, but replaced by (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PT[JLNQ]7_RESV from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-3-git-send-email-geert+renesas@glider.be Fixes: 3810e96056ffddf6 ("sh: modify pinmux for SH7757 2nd cut") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Jacopo Mondi Cc: Magnus Damm Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh4/cpu/sh7757.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7757.h b/arch/sh/include/cpu-sh4/cpu/sh7757.h index 5340f3bc1863..b40fb541e72a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7757.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7757.h @@ -40,7 +40,7 @@ enum { /* PTJ */ GPIO_PTJ0, GPIO_PTJ1, GPIO_PTJ2, GPIO_PTJ3, - GPIO_PTJ4, GPIO_PTJ5, GPIO_PTJ6, GPIO_PTJ7_RESV, + GPIO_PTJ4, GPIO_PTJ5, GPIO_PTJ6, /* PTK */ GPIO_PTK0, GPIO_PTK1, GPIO_PTK2, GPIO_PTK3, @@ -48,7 +48,7 @@ enum { /* PTL */ GPIO_PTL0, GPIO_PTL1, GPIO_PTL2, GPIO_PTL3, - GPIO_PTL4, GPIO_PTL5, GPIO_PTL6, GPIO_PTL7_RESV, + GPIO_PTL4, GPIO_PTL5, GPIO_PTL6, /* PTM */ GPIO_PTM0, GPIO_PTM1, GPIO_PTM2, GPIO_PTM3, @@ -56,7 +56,7 @@ enum { /* PTN */ GPIO_PTN0, GPIO_PTN1, GPIO_PTN2, GPIO_PTN3, - GPIO_PTN4, GPIO_PTN5, GPIO_PTN6, GPIO_PTN7_RESV, + GPIO_PTN4, GPIO_PTN5, GPIO_PTN6, /* PTO */ GPIO_PTO0, GPIO_PTO1, GPIO_PTO2, GPIO_PTO3, @@ -68,7 +68,7 @@ enum { /* PTQ */ GPIO_PTQ0, GPIO_PTQ1, GPIO_PTQ2, GPIO_PTQ3, - GPIO_PTQ4, GPIO_PTQ5, GPIO_PTQ6, GPIO_PTQ7_RESV, + GPIO_PTQ4, GPIO_PTQ5, GPIO_PTQ6, /* PTR */ GPIO_PTR0, GPIO_PTR1, GPIO_PTR2, GPIO_PTR3, From eae3df7e82318d798f45dedf111e241805ec7a4a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:41 -0700 Subject: [PATCH 09/51] sh: sh7264: remove nonexistent GPIO_PH[0-7] to fix pinctrl registration Pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PH[0-7] from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-4-git-send-email-geert+renesas@glider.be Fixes: 41797f75486d8ca3 ("sh: Add pinmux for sh7264") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Jacopo Mondi Cc: Magnus Damm Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh2a/cpu/sh7264.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7264.h b/arch/sh/include/cpu-sh2a/cpu/sh7264.h index 4d1ef6d74bd6..2ae0e938b657 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7264.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7264.h @@ -43,9 +43,7 @@ enum { GPIO_PG7, GPIO_PG6, GPIO_PG5, GPIO_PG4, GPIO_PG3, GPIO_PG2, GPIO_PG1, GPIO_PG0, - /* Port H */ - GPIO_PH7, GPIO_PH6, GPIO_PH5, GPIO_PH4, - GPIO_PH3, GPIO_PH2, GPIO_PH1, GPIO_PH0, + /* Port H - Port H does not have a Data Register */ /* Port I - not on device */ From d9d73e81fe82fdf4ee65a48c26531edc04108349 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:44 -0700 Subject: [PATCH 10/51] sh: sh7269: remove nonexistent GPIO_PH[0-7] to fix pinctrl registration Pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PH[0-7] from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-5-git-send-email-geert+renesas@glider.be Fixes: ef0fa5331a73e479 ("sh: Add pinmux for sh7269") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Yoshinori Sato Cc: Rich Felker Cc: Magnus Damm Cc: Yoshihiro Shimoda Cc: Jacopo Mondi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh2a/cpu/sh7269.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index 2a0ca8780f0d..13c495a9fc00 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -45,9 +45,7 @@ enum { GPIO_PG7, GPIO_PG6, GPIO_PG5, GPIO_PG4, GPIO_PG3, GPIO_PG2, GPIO_PG1, GPIO_PG0, - /* Port H */ - GPIO_PH7, GPIO_PH6, GPIO_PH5, GPIO_PH4, - GPIO_PH3, GPIO_PH2, GPIO_PH1, GPIO_PH0, + /* Port H - Port H does not have a Data Register */ /* Port I - not on device */ From d5567c9df1ef001b2a7e6684b3b3498371ee4cae Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 3 Oct 2017 16:14:47 -0700 Subject: [PATCH 11/51] z3fold: fix potential race in z3fold_reclaim_page It is possible that on a (partially) unsuccessful page reclaim, kref_put() called in z3fold_reclaim_page() does not yield page release, but the page is released shortly afterwards by another thread. Then z3fold_reclaim_page() would try to list_add() that (released) page again which is obviously a bug. To avoid that, spin_lock() has to be taken earlier, before the kref_put() call mentioned earlier. Link: http://lkml.kernel.org/r/20170913162937.bfff21c7d12b12a5f47639fd@gmail.com Signed-off-by: Vitaly Wool Cc: Dan Streetman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 486550df32be..b04fa3ba1bf2 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -875,16 +875,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: + spin_lock(&pool->lock); if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { + spin_unlock(&pool->lock); free_z3fold_page(page); return 0; } } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { atomic64_dec(&pool->pages_nr); + spin_unlock(&pool->lock); return 0; } - spin_lock(&pool->lock); /* * Add to the beginning of LRU. From 4d4bbd8526a8fbeb2c090ea360211fceff952383 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:14:50 -0700 Subject: [PATCH 12/51] mm, oom_reaper: skip mm structs with mmu notifiers Andrea has noticed that the oom_reaper doesn't invalidate the range via mmu notifiers (mmu_notifier_invalidate_range_start/end) and that can corrupt the memory of the kvm guest for example. tlb_flush_mmu_tlbonly already invokes mmu notifiers but that is not sufficient as per Andrea: "mmu_notifier_invalidate_range cannot be used in replacement of mmu_notifier_invalidate_range_start/end. For KVM mmu_notifier_invalidate_range is a noop and rightfully so. A MMU notifier implementation has to implement either ->invalidate_range method or the invalidate_range_start/end methods, not both. And if you implement invalidate_range_start/end like KVM is forced to do, calling mmu_notifier_invalidate_range in common code is a noop for KVM. For those MMU notifiers that can get away only implementing ->invalidate_range, the ->invalidate_range is implicitly called by mmu_notifier_invalidate_range_end(). And only those secondary MMUs that share the same pagetable with the primary MMU (like AMD iommuv2) can get away only implementing ->invalidate_range" As the callback is allowed to sleep and the implementation is out of hand of the MM it is safer to simply bail out if there is an mmu notifier registered. In order to not fail too early make the mm_has_notifiers check under the oom_lock and have a little nap before failing to give the current oom victim some more time to exit. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170913113427.2291-1-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 5 +++++ mm/oom_kill.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 7b2e31b1745a..6866e8126982 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -400,6 +400,11 @@ extern void mmu_notifier_synchronize(void); #else /* CONFIG_MMU_NOTIFIER */ +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return 0; +} + static inline void mmu_notifier_release(struct mm_struct *mm) { } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 99736e026712..dee0f75c3013 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "internal.h" @@ -494,6 +495,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) goto unlock_oom; } + /* + * If the mm has notifiers then we would need to invalidate them around + * unmap_page_range and that is risky because notifiers can sleep and + * what they do is basically undeterministic. So let's have a short + * sleep to give the oom victim some more time. + * TODO: we really want to get rid of this ugly hack and make sure that + * notifiers cannot block for unbounded amount of time and add + * mmu_notifier_invalidate_range_{start,end} around unmap_page_range + */ + if (mm_has_notifiers(mm)) { + up_read(&mm->mmap_sem); + schedule_timeout_idle(HZ); + goto unlock_oom; + } + /* * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run From 72f0184c8a00c70179cfed6266e2e06b4d400065 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:14:53 -0700 Subject: [PATCH 13/51] mm, memcg: remove hotplug locking from try_charge The following lockdep splat has been noticed during LTP testing ====================================================== WARNING: possible circular locking dependency detected 4.13.0-rc3-next-20170807 #12 Not tainted ------------------------------------------------------ a.out/4771 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){++++++}, at: [] drain_all_stock.part.35+0x18/0x140 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x175/0x530 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++++}: lock_acquire+0xc9/0x230 __might_fault+0x70/0xa0 _copy_to_user+0x23/0x70 filldir+0xa7/0x110 xfs_dir2_sf_getdents.isra.10+0x20c/0x2c0 [xfs] xfs_readdir+0x1fa/0x2c0 [xfs] xfs_file_readdir+0x30/0x40 [xfs] iterate_dir+0x17a/0x1a0 SyS_getdents+0xb0/0x160 entry_SYSCALL_64_fastpath+0x1f/0xbe -> #2 (&type->i_mutex_dir_key#3){++++++}: lock_acquire+0xc9/0x230 down_read+0x51/0xb0 lookup_slow+0xde/0x210 walk_component+0x160/0x250 link_path_walk+0x1a6/0x610 path_openat+0xe4/0xd50 do_filp_open+0x91/0x100 file_open_name+0xf5/0x130 filp_open+0x33/0x50 kernel_read_file_from_path+0x39/0x80 _request_firmware+0x39f/0x880 request_firmware_direct+0x37/0x50 request_microcode_fw+0x64/0xe0 reload_store+0xf7/0x180 dev_attr_store+0x18/0x30 sysfs_kf_write+0x44/0x60 kernfs_fop_write+0x113/0x1a0 __vfs_write+0x37/0x170 vfs_write+0xc7/0x1c0 SyS_write+0x58/0xc0 do_syscall_64+0x6c/0x1f0 return_from_SYSCALL_64+0x0/0x7a -> #1 (microcode_mutex){+.+.+.}: lock_acquire+0xc9/0x230 __mutex_lock+0x88/0x960 mutex_lock_nested+0x1b/0x20 microcode_init+0xbb/0x208 do_one_initcall+0x51/0x1a9 kernel_init_freeable+0x208/0x2a7 kernel_init+0xe/0x104 ret_from_fork+0x2a/0x40 -> #0 (cpu_hotplug_lock.rw_sem){++++++}: __lock_acquire+0x153c/0x1550 lock_acquire+0xc9/0x230 cpus_read_lock+0x4b/0x90 drain_all_stock.part.35+0x18/0x140 try_charge+0x3ab/0x6e0 mem_cgroup_try_charge+0x7f/0x2c0 shmem_getpage_gfp+0x25f/0x1050 shmem_fault+0x96/0x200 __do_fault+0x1e/0xa0 __handle_mm_fault+0x9c3/0xe00 handle_mm_fault+0x16e/0x380 __do_page_fault+0x24a/0x530 do_page_fault+0x30/0x80 page_fault+0x28/0x30 other info that might help us debug this: Chain exists of: cpu_hotplug_lock.rw_sem --> &type->i_mutex_dir_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&type->i_mutex_dir_key#3); lock(&mm->mmap_sem); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** 2 locks held by a.out/4771: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x175/0x530 #1: (percpu_charge_mutex){+.+...}, at: [] try_charge+0x397/0x6e0 The problem is very similar to the one fixed by commit a459eeb7b852 ("mm, page_alloc: do not depend on cpu hotplug locks inside the allocator"). We are taking hotplug locks while we can be sitting on top of basically arbitrary locks. This just calls for problems. We can get rid of {get,put}_online_cpus, fortunately. We do not have to be worried about races with memory hotplug because drain_local_stock, which is called from both the WQ draining and the memory hotplug contexts, is always operating on the local cpu stock with IRQs disabled. The only thing to be careful about is that the target memcg doesn't vanish while we are still in drain_all_stock so take a reference on it. Link: http://lkml.kernel.org/r/20170913090023.28322-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Artem Savkov Tested-by: Artem Savkov Cc: Johannes Weiner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15af3da5af02..696c6529e900 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy) struct memcg_stock_pcp *stock; unsigned long flags; + /* + * The only protection from memory hotplug vs. drain_stock races is + * that we always operate on local CPU stock here with IRQ disabled + */ local_irq_save(flags); stock = this_cpu_ptr(&memcg_stock); @@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) /* If someone's already draining, avoid adding running more workers. */ if (!mutex_trylock(&percpu_charge_mutex)) return; - /* Notify other cpus that system-wide "drain" is running */ - get_online_cpus(); + /* + * Notify other cpus that system-wide "drain" is running + * We do not care about races with the cpu hotplug because cpu down + * as well as workers from this path always operate on the local + * per-cpu data. CPU up doesn't touch memcg_stock at all. + */ curcpu = get_cpu(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; memcg = stock->cached; - if (!memcg || !stock->nr_pages) + if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css)) continue; - if (!mem_cgroup_is_descendant(memcg, root_memcg)) + if (!mem_cgroup_is_descendant(memcg, root_memcg)) { + css_put(&memcg->css); continue; + } if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { if (cpu == curcpu) drain_local_stock(&stock->work); else schedule_work_on(cpu, &stock->work); } + css_put(&memcg->css); } put_cpu(); - put_online_cpus(); mutex_unlock(&percpu_charge_mutex); } From 3f2eb0287ebd62ec8d6d544f830285302279e6bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 3 Oct 2017 16:14:57 -0700 Subject: [PATCH 14/51] mm/memcg: avoid page count check for zone device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix for 4.14, zone device page always have an elevated refcount of one and thus page count sanity check in uncharge_page() is inappropriate for them. [mhocko@suse.com: nano-optimize VM_BUG_ON in uncharge_page] Link: http://lkml.kernel.org/r/20170914190011.5217-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Michal Hocko Reported-by: Evgeny Baskakov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 696c6529e900..d5f3a62887cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5658,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug) static void uncharge_page(struct page *page, struct uncharge_gather *ug) { VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); + VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) && + !PageHWPoison(page) , page); if (!page->mem_cgroup) return; From a1b2289cef92ef0e9a92afcd2e1ea71d5bcaaf64 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Tue, 3 Oct 2017 16:15:00 -0700 Subject: [PATCH 15/51] android: binder: drop lru lock in isolate callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the global lru lock in isolate callback before calling zap_page_range which calls cond_resched, and re-acquire the global lru lock before returning. Also change return code to LRU_REMOVED_RETRY. Use mmput_async when fail to acquire mmap sem in an atomic context. Fix "BUG: sleeping function called from invalid context" errors when CONFIG_DEBUG_ATOMIC_SLEEP is enabled. Also restore mmput_async, which was initially introduced in commit ec8d7c14ea14 ("mm, oom_reaper: do not mmput synchronously from the oom reaper context"), and was removed in commit 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently"). Link: http://lkml.kernel.org/r/20170914182231.90908-1-sherryy@android.com Fixes: f2517eb76f1f2 ("android: binder: Add global lru shrinker to binder") Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman Reported-by: Kyle Yan Acked-by: Arve Hjønnevåg Acked-by: Michal Hocko Cc: Martijn Coenen Cc: Todd Kjos Cc: Riley Andrews Cc: Ingo Molnar Cc: Vlastimil Babka Cc: Hillf Danton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Hoeun Ryu Cc: Christopher Lameter Cc: Vegard Nossum Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/android/binder_alloc.c | 18 ++++++++++++------ include/linux/sched/mm.h | 6 ++++++ kernel/fork.c | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 8fe165844e47..064f5e31ec55 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -913,6 +913,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, struct binder_alloc *alloc; uintptr_t page_addr; size_t index; + struct vm_area_struct *vma; alloc = page->alloc; if (!mutex_trylock(&alloc->mutex)) @@ -923,16 +924,22 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; - if (alloc->vma) { + vma = alloc->vma; + if (vma) { mm = get_task_mm(alloc->tsk); if (!mm) goto err_get_task_mm_failed; if (!down_write_trylock(&mm->mmap_sem)) goto err_down_write_mmap_sem_failed; + } + list_lru_isolate(lru, item); + spin_unlock(lock); + + if (vma) { trace_binder_unmap_user_start(alloc, index); - zap_page_range(alloc->vma, + zap_page_range(vma, page_addr + alloc->user_buffer_offset, PAGE_SIZE); @@ -950,13 +957,12 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_end(alloc, index); - list_lru_isolate(lru, item); - + spin_lock(lock); mutex_unlock(&alloc->mutex); - return LRU_REMOVED; + return LRU_REMOVED_RETRY; err_down_write_mmap_sem_failed: - mmput(mm); + mmput_async(mm); err_get_task_mm_failed: err_page_already_freed: mutex_unlock(&alloc->mutex); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3a19c253bdb1..ae53e413fb13 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -84,6 +84,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +void mmput_async(struct mm_struct *); +#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); diff --git a/kernel/fork.c b/kernel/fork.c index 10646182440f..e702cb9ffbd8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -946,6 +946,24 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +#ifdef CONFIG_MMU +static void mmput_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, + async_put_work); + + __mmput(mm); +} + +void mmput_async(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_users)) { + INIT_WORK(&mm->async_put_work, mmput_async_fn); + schedule_work(&mm->async_put_work); + } +} +#endif + /** * set_mm_exe_file - change a reference to the mm's executable file * From 6818600ff094ca255a7fe31838ad50c29656c3c5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 3 Oct 2017 16:15:03 -0700 Subject: [PATCH 16/51] mm,compaction: serialize waitqueue_active() checks (for real) Andrea brought to my attention that the L->{L,S} guarantees are completely bogus for this case. I was looking at the diagram, from the offending commit, when that _is_ the race, we had the load reordered already. What we need is at least S->L semantics, thus simply use wq_has_sleeper() to serialize the call for good. Link: http://lkml.kernel.org/r/20170914175313.GB811@linux-80c1.suse Fixes: 46acef048a6 (mm,compaction: serialize waitqueue_active() checks) Signed-off-by: Davidlohr Bueso Reported-by: Andrea Parri Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index fb548e4c7bd4..03d31a875341 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1999,17 +1999,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) if (pgdat->kcompactd_max_order < order) pgdat->kcompactd_max_order = order; - /* - * Pairs with implicit barrier in wait_event_freezable() - * such that wakeups are not missed in the lockless - * waitqueue_active() call. - */ - smp_acquire__after_ctrl_dep(); - if (pgdat->kcompactd_classzone_idx > classzone_idx) pgdat->kcompactd_classzone_idx = classzone_idx; - if (!waitqueue_active(&pgdat->kcompactd_wait)) + /* + * Pairs with implicit barrier in wait_event_freezable() + * such that wakeups are not missed. + */ + if (!wq_has_sleeper(&pgdat->kcompactd_wait)) return; if (!kcompactd_node_suitable(pgdat)) From 3552935742e0d5f0dafd823736f45bdaa7ba672c Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 3 Oct 2017 16:15:06 -0700 Subject: [PATCH 17/51] z3fold: fix stale list handling Fix the situation when clear_bit() is called for page->private before the page pointer is actually assigned. While at it, remove work_busy() check because it is costly and does not give 100% guarantee anyway. Signed-off-by: Vitaly Wool Cc: Dan Streetman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index b04fa3ba1bf2..b2ba2ba585f3 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) WARN_ON(!list_empty(&zhdr->buddy)); set_bit(PAGE_STALE, &page->private); + clear_bit(NEEDS_COMPACTING, &page->private); spin_lock(&pool->lock); if (!list_empty(&page->lru)) list_del(&page->lru); @@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w) list_del(&zhdr->buddy); if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) continue; - clear_bit(NEEDS_COMPACTING, &page->private); spin_unlock(&pool->stale_lock); cancel_work_sync(&zhdr->work); free_z3fold_page(page); @@ -624,10 +624,8 @@ lookup: * stale pages list. cancel_work_sync() can sleep so we must make * sure it won't be called in case we're in atomic context. */ - if (zhdr && (can_sleep || !work_pending(&zhdr->work) || - !unlikely(work_busy(&zhdr->work)))) { + if (zhdr && (can_sleep || !work_pending(&zhdr->work))) { list_del(&zhdr->buddy); - clear_bit(NEEDS_COMPACTING, &page->private); spin_unlock(&pool->stale_lock); if (can_sleep) cancel_work_sync(&zhdr->work); From 57148a64e823bb1f49112fa52a92a7f372cda892 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Oct 2017 16:15:10 -0700 Subject: [PATCH 18/51] mm: meminit: mark init_reserved_page as __meminit The function is called from __meminit context and calls other __meminit functions but isn't it self mark as such today: WARNING: vmlinux.o(.text.unlikely+0x4516): Section mismatch in reference from the function init_reserved_page() to the function .meminit.text:early_pfn_to_nid() The function init_reserved_page() references the function __meminit early_pfn_to_nid(). This is often because init_reserved_page lacks a __meminit annotation or the annotation of early_pfn_to_nid is wrong. On most compilers, we don't notice this because the function gets inlined all the time. Adding __meminit here fixes the harmless warning for the old versions and is generally the correct annotation. Link: http://lkml.kernel.org/r/20170915193149.901180-1-arnd@arndb.de Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Arnd Bergmann Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c841af88836a..38d165a87860 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, } #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -static void init_reserved_page(unsigned long pfn) +static void __meminit init_reserved_page(unsigned long pfn) { pg_data_t *pgdat; int nid, zid; From 31d1e130f4a0f8f629a460167569577cac9b17c1 Mon Sep 17 00:00:00 2001 From: Ioan Nicu Date: Tue, 3 Oct 2017 16:15:13 -0700 Subject: [PATCH 19/51] rapidio: remove global irq spinlocks from the subsystem Locking of config and doorbell operations should be done only if the underlying hardware requires it. This patch removes the global spinlocks from the rapidio subsystem and moves them to the mport drivers (fsl_rio and tsi721), only to the necessary places. For example, local config space read and write operations (lcread/lcwrite) are atomic in all existing drivers, so there should be no need for locking, while the cread/cwrite operations which generate maintenance transactions need to be synchronized with a lock. Later, each driver could chose to use a per-port lock instead of a global one, or even more granular locking. Link: http://lkml.kernel.org/r/20170824113023.GD50104@nokia.com Signed-off-by: Ioan Nicu Signed-off-by: Frank Kunz Acked-by: Alexandre Bounine Cc: Matt Porter Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 17 ++++++++++++-- arch/powerpc/sysdev/fsl_rmu.c | 8 +++++++ drivers/rapidio/devices/tsi721.c | 7 ++++++ drivers/rapidio/rio-access.c | 40 ++++---------------------------- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 9234be1e66f5..5011ffea4e4b 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -71,6 +71,8 @@ #define RIWAR_WRTYP_ALLOC 0x00006000 #define RIWAR_SIZE_MASK 0x0000003F +static DEFINE_SPINLOCK(fsl_rio_config_lock); + #define __fsl_read_rio_config(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ @@ -184,6 +186,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, u8 hopcount, u32 offset, int len, u32 *val) { struct rio_priv *priv = mport->priv; + unsigned long flags; u8 *data; u32 rval, err = 0; @@ -197,6 +200,8 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len)) return -EINVAL; + spin_lock_irqsave(&fsl_rio_config_lock, flags); + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | (offset >> 12)); out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10)); @@ -213,6 +218,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, __fsl_read_rio_config(rval, data, err, "lwz"); break; default: + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); return -EINVAL; } @@ -221,6 +227,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, err, destid, hopcount, offset); } + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); *val = rval; return err; @@ -244,7 +251,10 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, u8 hopcount, u32 offset, int len, u32 val) { struct rio_priv *priv = mport->priv; + unsigned long flags; u8 *data; + int ret = 0; + pr_debug ("fsl_rio_config_write:" " index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n", @@ -255,6 +265,8 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len)) return -EINVAL; + spin_lock_irqsave(&fsl_rio_config_lock, flags); + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | (offset >> 12)); out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10)); @@ -271,10 +283,11 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, out_be32((u32 *) data, val); break; default: - return -EINVAL; + ret = -EINVAL; } + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); - return 0; + return ret; } static void fsl_rio_inbound_mem_init(struct rio_priv *priv) diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index ab7a74c75be8..88b35a3dcdc5 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -104,6 +104,8 @@ #define DOORBELL_MESSAGE_SIZE 0x08 +static DEFINE_SPINLOCK(fsl_rio_doorbell_lock); + struct rio_msg_regs { u32 omr; u32 osr; @@ -626,9 +628,13 @@ err_out: int fsl_rio_doorbell_send(struct rio_mport *mport, int index, u16 destid, u16 data) { + unsigned long flags; + pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n", index, destid, data); + spin_lock_irqsave(&fsl_rio_doorbell_lock, flags); + /* In the serial version silicons, such as MPC8548, MPC8641, * below operations is must be. */ @@ -638,6 +644,8 @@ int fsl_rio_doorbell_send(struct rio_mport *mport, out_be32(&dbell->dbell_regs->oddatr, (index << 20) | data); out_be32(&dbell->dbell_regs->odmr, 0x00000001); + spin_unlock_irqrestore(&fsl_rio_doorbell_lock, flags); + return 0; } diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 315a4be8dc1e..9a68914100ad 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -51,6 +51,8 @@ module_param(mbox_sel, byte, S_IRUGO); MODULE_PARM_DESC(mbox_sel, "RIO Messaging MBOX Selection Mask (default: 0x0f = all)"); +static DEFINE_SPINLOCK(tsi721_maint_lock); + static void tsi721_omsg_handler(struct tsi721_device *priv, int ch); static void tsi721_imsg_handler(struct tsi721_device *priv, int ch); @@ -124,12 +126,15 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size, void __iomem *regs = priv->regs + TSI721_DMAC_BASE(priv->mdma.ch_id); struct tsi721_dma_desc *bd_ptr; u32 rd_count, swr_ptr, ch_stat; + unsigned long flags; int i, err = 0; u32 op = do_wr ? MAINT_WR : MAINT_RD; if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32))) return -EINVAL; + spin_lock_irqsave(&tsi721_maint_lock, flags); + bd_ptr = priv->mdma.bd_base; rd_count = ioread32(regs + TSI721_DMAC_DRDCNT); @@ -197,7 +202,9 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size, */ swr_ptr = ioread32(regs + TSI721_DMAC_DSWP); iowrite32(swr_ptr, regs + TSI721_DMAC_DSRP); + err_out: + spin_unlock_irqrestore(&tsi721_maint_lock, flags); return err; } diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c index a3824baca2e5..3ee9af83b638 100644 --- a/drivers/rapidio/rio-access.c +++ b/drivers/rapidio/rio-access.c @@ -13,17 +13,9 @@ #include #include -/* - * These interrupt-safe spinlocks protect all accesses to RIO - * configuration space and doorbell access. - */ -static DEFINE_SPINLOCK(rio_config_lock); -static DEFINE_SPINLOCK(rio_doorbell_lock); - /* * Wrappers for all RIO configuration access functions. They just check - * alignment, do locking and call the low-level functions pointed to - * by rio_mport->ops. + * alignment and call the low-level functions pointed to by rio_mport->ops. */ #define RIO_8_BAD 0 @@ -44,13 +36,10 @@ int __rio_local_read_config_##size \ (struct rio_mport *mport, u32 offset, type *value) \ { \ int res; \ - unsigned long flags; \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ res = mport->ops->lcread(mport, mport->id, offset, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -67,13 +56,8 @@ int __rio_local_read_config_##size \ int __rio_local_write_config_##size \ (struct rio_mport *mport, u32 offset, type value) \ { \ - int res; \ - unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcwrite(mport, mport->id, offset, len, value);\ - spin_unlock_irqrestore(&rio_config_lock, flags); \ - return res; \ + return mport->ops->lcwrite(mport, mport->id, offset, len, value);\ } RIO_LOP_READ(8, u8, 1) @@ -104,13 +88,10 @@ int rio_mport_read_config_##size \ (struct rio_mport *mport, u16 destid, u8 hopcount, u32 offset, type *value) \ { \ int res; \ - unsigned long flags; \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ res = mport->ops->cread(mport, mport->id, destid, hopcount, offset, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -127,13 +108,9 @@ int rio_mport_read_config_##size \ int rio_mport_write_config_##size \ (struct rio_mport *mport, u16 destid, u8 hopcount, u32 offset, type value) \ { \ - int res; \ - unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cwrite(mport, mport->id, destid, hopcount, offset, len, value); \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ - return res; \ + return mport->ops->cwrite(mport, mport->id, destid, hopcount, \ + offset, len, value); \ } RIO_OP_READ(8, u8, 1) @@ -162,14 +139,7 @@ EXPORT_SYMBOL_GPL(rio_mport_write_config_32); */ int rio_mport_send_doorbell(struct rio_mport *mport, u16 destid, u16 data) { - int res; - unsigned long flags; - - spin_lock_irqsave(&rio_doorbell_lock, flags); - res = mport->ops->dsend(mport, mport->id, destid, data); - spin_unlock_irqrestore(&rio_doorbell_lock, flags); - - return res; + return mport->ops->dsend(mport, mport->id, destid, data); } EXPORT_SYMBOL_GPL(rio_mport_send_doorbell); From a872eb2131e91ce7c89a8888974a5e22a272b12f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 3 Oct 2017 16:15:16 -0700 Subject: [PATCH 20/51] mm: fix RODATA_TEST failure "rodata_test: test data was not read only" On powerpc, RODATA_TEST fails with message the following messages: Freeing unused kernel memory: 528K rodata_test: test data was not read only This is because GCC allocates it to .data section: c0695034 g O .data 00000004 rodata_test_data Since commit 056b9d8a7692 ("mm: remove rodata_test_data export, add pr_fmt"), rodata_test_data is used only inside rodata_test.c By declaring it static, it gets properly allocated into .rodata section instead of .data: c04df710 l O .rodata 00000004 rodata_test_data Fixes: 056b9d8a7692 ("mm: remove rodata_test_data export, add pr_fmt") Link: http://lkml.kernel.org/r/20170921093729.1080368AC1@po15668-vm-win7.idsi0.si.c-s.fr Signed-off-by: Christophe Leroy Cc: Kees Cook Cc: Jinbum Park Cc: Segher Boessenkool Cc: David Laight Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rodata_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 6bb4deb12e78..d908c8769b48 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -14,7 +14,7 @@ #include #include -const int rodata_test_data = 0xC3; +static const int rodata_test_data = 0xC3; void rodata_test(void) { From ae94264ed4b0cf7cd887947650db4c69acb62072 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 3 Oct 2017 16:15:19 -0700 Subject: [PATCH 21/51] zram: fix null dereference of handle In testing I found handle passed to zs_map_object in __zram_bvec_read is NULL so eh kernel goes oops in pin_object(). The reason is there is no routine to check the slot's freeing after getting the slot's lock. This patch fixes it. [minchan@kernel.org: v2] Link: http://lkml.kernel.org/r/1505887347-10881-1-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/1505788488-26723-1-git-send-email-minchan@kernel.org Fixes: 1f7319c74275 ("zram: partial IO refactoring") Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 36 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2981c27d3aae..f149d3e61234 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -766,27 +766,6 @@ static void zram_slot_unlock(struct zram *zram, u32 index) bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); } -static bool zram_same_page_read(struct zram *zram, u32 index, - struct page *page, - unsigned int offset, unsigned int len) -{ - zram_slot_lock(zram, index); - if (unlikely(!zram_get_handle(zram, index) || - zram_test_flag(zram, index, ZRAM_SAME))) { - void *mem; - - zram_slot_unlock(zram, index); - mem = kmap_atomic(page); - zram_fill_page(mem + offset, len, - zram_get_element(zram, index)); - kunmap_atomic(mem); - return true; - } - zram_slot_unlock(zram, index); - - return false; -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -884,11 +863,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, zram_slot_unlock(zram, index); } - if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) - return 0; - zram_slot_lock(zram, index); handle = zram_get_handle(zram, index); + if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { + unsigned long value; + void *mem; + + value = handle ? zram_get_element(zram, index) : 0; + mem = kmap_atomic(page); + zram_fill_page(mem, PAGE_SIZE, value); + kunmap_atomic(mem); + zram_slot_unlock(zram, index); + return 0; + } + size = zram_get_obj_size(zram, index); src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); From 5bdfca6435b8294490ffb5b7c8b7d8eac3814b06 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:15:23 -0700 Subject: [PATCH 22/51] m32r: define CPU_BIG_ENDIAN The build of m32r allmodconfig is giving lots of build warnings about: include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp] #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN Define CPU_BIG_ENDIAN like the way CPU_LITTLE_ENDIAN is defined. Link: http://lkml.kernel.org/r/1505678083-10320-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 87cde1e4b38c..0777f3a8a1f3 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -194,6 +194,10 @@ config TIMER_DIVIDE int "Timer divider (integer)" default "128" +config CPU_BIG_ENDIAN + bool "Generate big endian code" + default n + config CPU_LITTLE_ENDIAN bool "Generate little endian code" default n From f4e222c56c83b2aed7cc2b329fca7435508eefa1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 3 Oct 2017 16:15:25 -0700 Subject: [PATCH 23/51] mm: have filemap_check_and_advance_wb_err clear AS_EIO/AS_ENOSPC Eryu noticed that he could sometimes get a leftover error reported when it shouldn't be on fsync with ext2 and non-journalled ext4. The problem is that writeback_single_inode still uses filemap_fdatawait. That picks up a previously set AS_EIO flag, which would ordinarily have been cleared before. Since we're mostly using this function as a replacement for filemap_check_errors, have filemap_check_and_advance_wb_err clear AS_EIO and AS_ENOSPC when reporting an error. That should allow the new function to better emulate the behavior of the old with respect to these flags. Link: http://lkml.kernel.org/r/20170922133331.28812-1-jlayton@kernel.org Signed-off-by: Jeff Layton Reported-by: Eryu Guan Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index db250d0e0565..594d73fef8b4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file) trace_file_check_and_advance_wb_err(file, old); spin_unlock(&file->f_lock); } + + /* + * We're mostly using this function as a drop in replacement for + * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect + * that the legacy code would have had on these flags. + */ + clear_bit(AS_EIO, &mapping->flags); + clear_bit(AS_ENOSPC, &mapping->flags); return err; } EXPORT_SYMBOL(file_check_and_advance_wb_err); From 24c92eb7dce0a299b8e1a8c5fa585844a53bf7f0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 3 Oct 2017 16:15:29 -0700 Subject: [PATCH 24/51] mm: avoid marking swap cached page as lazyfree MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear SwapBacked). There is no lock to prevent the page is added to swap cache between these two steps by page reclaim. Page reclaim could add the page to swap cache and unmap the page. After page reclaim, the page is added back to lru. At that time, we probably start draining per-cpu pagevec and mark the page lazyfree. So the page could be in a state with SwapBacked cleared and PG_swapcache set. Next time there is a refault in the virtual address, do_swap_page can find the page from swap cache but the page has PageSwapCache false because SwapBacked isn't set, so do_swap_page will bail out and do nothing. The task will keep running into fault handler. Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Link: http://lkml.kernel.org/r/6537ef3814398c0073630b03f176263bc81f0902.1506446061.git.shli@fb.com Signed-off-by: Shaohua Li Reported-by: Artem Savkov Tested-by: Artem Savkov Reviewed-by: Rik van Riel Acked-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Minchan Kim Cc: Hillf Danton Cc: Hugh Dickins Cc: Mel Gorman Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index 9295ae960d66..a77d68f2c1b6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && - !PageUnevictable(page)) { + !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); del_page_from_lru_list(page, lruvec, @@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page) void mark_page_lazyfree(struct page *page) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && - !PageUnevictable(page)) { + !PageSwapCache(page) && !PageUnevictable(page)) { struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); get_page(page); From 9625456cc76391b7f3f2809579126542a8ed4d39 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 3 Oct 2017 16:15:32 -0700 Subject: [PATCH 25/51] mm: fix data corruption caused by lazyfree page MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear SwapBacked). There is no lock to prevent the page is added to swap cache between these two steps by page reclaim. If page reclaim finds such page, it will simply add the page to swap cache without pageout the page to swap because the page is marked as clean. Next time, page fault will read data from the swap slot which doesn't have the original data, so we have a data corruption. To fix issue, we mark the page dirty and pageout the page. However, we shouldn't dirty all pages which is clean and in swap cache. swapin page is swap cache and clean too. So we only dirty page which is added into swap cache in page reclaim, which shouldn't be swapin page. As Minchan suggested, simply dirty the page in add_to_swap can do the job. Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Link: http://lkml.kernel.org/r/08c84256b007bf3f63c91d94383bd9eb6fee2daa.1506446061.git.shli@fb.com Signed-off-by: Shaohua Li Reported-by: Artem Savkov Acked-by: Michal Hocko Acked-by: Minchan Kim Cc: Johannes Weiner Cc: Hillf Danton Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/swap_state.c b/mm/swap_state.c index 71ce2d1ccbf7..ed91091d1e68 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -242,6 +242,17 @@ int add_to_swap(struct page *page) * clear SWAP_HAS_CACHE flag. */ goto fail; + /* + * Normally the page will be dirtied in unmap because its pte should be + * dirty. A special case is MADV_FREE page. The page'e pte could have + * dirty bit cleared but the page's SwapBacked bit is still set because + * clearing the dirty bit and SwapBacked bit has no lock protected. For + * such page, unmap will not set dirty bit for it, so page reclaim will + * not write the page out. This can cause data corruption when the page + * is swap in later. Always setting the dirty bit for the page solves + * the problem. + */ + set_page_dirty(page); return 1; From 7d790d2da386a52cfebcf0c898ba927bece9d4ab Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Tue, 3 Oct 2017 16:15:35 -0700 Subject: [PATCH 26/51] mm/device-public-memory: fix edge case in _vm_normal_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With device public pages at the end of my memory space, I'm getting output from _vm_normal_page(): BUG: Bad page map in process migrate_pages pte:c0800001ffff0d06 pmd:f95d3000 addr:00007fff89330000 vm_flags:00100073 anon_vma:c0000000fa899320 mapping: (null) index:7fff8933 file: (null) fault: (null) mmap: (null) readpage: (null) CPU: 0 PID: 13963 Comm: migrate_pages Tainted: P B OE 4.14.0-rc1-wip #155 Call Trace: dump_stack+0xb0/0xf4 (unreliable) print_bad_pte+0x28c/0x340 _vm_normal_page+0xc0/0x140 zap_pte_range+0x664/0xc10 unmap_page_range+0x318/0x670 unmap_vmas+0x74/0xe0 exit_mmap+0xe8/0x1f0 mmput+0xac/0x1f0 do_exit+0x348/0xcd0 do_group_exit+0x5c/0xf0 SyS_exit_group+0x1c/0x20 system_call+0x58/0x6c The pfn causing this is the very last one. Correct the bounds check accordingly. Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent with CPU") Link: http://lkml.kernel.org/r/1506092178-20351-1-git-send-email-arbab@linux.vnet.ibm.com Signed-off-by: Reza Arbab Reviewed-by: Jérôme Glisse Reviewed-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index ec4e15494901..a728bed16c20 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, * vm_normal_page() so that we do not have to special case all * call site of vm_normal_page(). */ - if (likely(pfn < highest_memmap_pfn)) { + if (likely(pfn <= highest_memmap_pfn)) { struct page *page = pfn_to_page(pfn); if (is_device_public_page(page)) { From 384632e67e0829deb8015ee6ad916b180049d252 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 3 Oct 2017 16:15:38 -0700 Subject: [PATCH 27/51] userfaultfd: non-cooperative: fix fork use after free When reading the event from the uffd, we put it on a temporary fork_event list to detect if we can still access it after releasing and retaking the event_wqh.lock. If fork aborts and removes the event from the fork_event all is fine as long as we're still in the userfault read context and fork_event head is still alive. We've to put the event allocated in the fork kernel stack, back from fork_event list-head to the event_wqh head, before returning from userfaultfd_ctx_read, because the fork_event head lifetime is limited to the userfaultfd_ctx_read stack lifetime. Forgetting to move the event back to its event_wqh place then results in __remove_wait_queue(&ctx->event_wqh, &ewq->wq); in userfaultfd_event_wait_completion to remove it from a head that has been already freed from the reader stack. This could only happen if resolve_userfault_fork failed (for example if there are no file descriptors available to allocate the fork uffd). If it succeeded it was put back correctly. Furthermore, after find_userfault_evt receives a fork event, the forked userfault context in fork_nctx and uwq->msg.arg.reserved.reserved1 can be released by the fork thread as soon as the event_wqh.lock is released. Taking a reference on the fork_nctx before dropping the lock prevents an use after free in resolve_userfault_fork(). If the fork side aborted and it already released everything, we still try to succeed resolve_userfault_fork(), if possible. Fixes: 893e26e61d04eac9 ("userfaultfd: non-cooperative: Add fork() event") Link: http://lkml.kernel.org/r/20170920180413.26713-1-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Mark Rutland Tested-by: Mark Rutland Cc: Pavel Emelyanov Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 66 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ef4b48d1ea42..1c713fd5b3e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { + /* + * &ewq->wq may be queued in fork_event, but + * __remove_wait_queue ignores the head + * parameter. It would be a problem if it + * didn't. + */ __remove_wait_queue(&ctx->event_wqh, &ewq->wq); if (ewq->msg.event == UFFD_EVENT_FORK) { struct userfaultfd_ctx *new; @@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, (unsigned long) uwq->msg.arg.reserved.reserved1; list_move(&uwq->wq.entry, &fork_event); + /* + * fork_nctx can be freed as soon as + * we drop the lock, unless we take a + * reference on it. + */ + userfaultfd_ctx_get(fork_nctx); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; @@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); + spin_lock(&ctx->event_wqh.lock); + if (!list_empty(&fork_event)) { + /* + * The fork thread didn't abort, so we can + * drop the temporary refcount. + */ + userfaultfd_ctx_put(fork_nctx); - if (!ret) { - spin_lock(&ctx->event_wqh.lock); - if (!list_empty(&fork_event)) { - uwq = list_first_entry(&fork_event, - typeof(*uwq), - wq.entry); - list_del(&uwq->wq.entry); - __add_wait_queue(&ctx->event_wqh, &uwq->wq); + uwq = list_first_entry(&fork_event, + typeof(*uwq), + wq.entry); + /* + * If fork_event list wasn't empty and in turn + * the event wasn't already released by fork + * (the event is allocated on fork kernel + * stack), put the event back to its place in + * the event_wq. fork_event head will be freed + * as soon as we return so the event cannot + * stay queued there no matter the current + * "ret" value. + */ + list_del(&uwq->wq.entry); + __add_wait_queue(&ctx->event_wqh, &uwq->wq); + + /* + * Leave the event in the waitqueue and report + * error to userland if we failed to resolve + * the userfault fork. + */ + if (likely(!ret)) userfaultfd_event_complete(ctx, uwq); - } - spin_unlock(&ctx->event_wqh.lock); + } else { + /* + * Here the fork thread aborted and the + * refcount from the fork thread on fork_nctx + * has already been released. We still hold + * the reference we took before releasing the + * lock above. If resolve_userfault_fork + * failed we've to drop it because the + * fork_nctx has to be freed in such case. If + * it succeeded we'll hold it because the new + * uffd references it. + */ + if (ret) + userfaultfd_ctx_put(fork_nctx); } + spin_unlock(&ctx->event_wqh.lock); } return ret; From c2315c187fa0d3ab363fdebe22718170b40473e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:42 -0700 Subject: [PATCH 28/51] exec: load_script: kill the onstack interp[BINPRM_BUF_SIZE] array Patch series "exec: binfmt_misc: fix use-after-free, kill iname[BINPRM_BUF_SIZE]". It looks like this code was always wrong, then commit 948b701a607f ("binfmt_misc: add persistent opened binary handler for containers") added more problems. This patch (of 6): load_script() can simply use i_name instead, it points into bprm->buf[] and nobody can change this memory until we call prepare_binprm(). The only complication is that we need to also change the signature of bprm_change_interp() but this change looks good too. While at it, do whitespace/style cleanups. NOTE: the real motivation for this change is that people want to increase BINPRM_BUF_SIZE, we need to change load_misc_binary() too but this looks more complicated because afaics it is very buggy. Link: http://lkml.kernel.org/r/20170918163446.GA26793@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Travis Gummels Cc: Ben Woodard Cc: Jim Foraker Cc: Cc: Al Viro Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_script.c | 17 +++++++++-------- fs/exec.c | 2 +- include/linux/binfmts.h | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index afdf4e3cafc2..7cde3f46ad26 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm) const char *i_arg, *i_name; char *cp; struct file *file; - char interp[BINPRM_BUF_SIZE]; int retval; if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) @@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm) break; } for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++); - if (*cp == '\0') + if (*cp == '\0') return -ENOEXEC; /* No interpreter name found */ i_name = cp; i_arg = NULL; @@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm) *cp++ = '\0'; if (*cp) i_arg = cp; - strcpy (interp, i_name); /* * OK, we've parsed out the interpreter name and * (optional) argument. @@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm) if (retval) return retval; retval = copy_strings_kernel(1, &bprm->interp, bprm); - if (retval < 0) return retval; + if (retval < 0) + return retval; bprm->argc++; if (i_arg) { retval = copy_strings_kernel(1, &i_arg, bprm); - if (retval < 0) return retval; + if (retval < 0) + return retval; bprm->argc++; } retval = copy_strings_kernel(1, &i_name, bprm); - if (retval) return retval; + if (retval) + return retval; bprm->argc++; - retval = bprm_change_interp(interp, bprm); + retval = bprm_change_interp(i_name, bprm); if (retval < 0) return retval; /* * OK, now restart the process with the interpreter's dentry. */ - file = open_exec(interp); + file = open_exec(i_name); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/fs/exec.c b/fs/exec.c index ac34d9724684..5470d3c1892a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm) kfree(bprm); } -int bprm_change_interp(char *interp, struct linux_binprm *bprm) +int bprm_change_interp(const char *interp, struct linux_binprm *bprm) { /* If a binfmt changed the interp, free it first. */ if (bprm->interp != bprm->filename) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fb44d6180ca0..18d05b5491f3 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -131,7 +131,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *sp_location); -extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); +extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); From baba1b29731c79d605100087b8f02f9e1cf5a344 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:45 -0700 Subject: [PATCH 29/51] exec: binfmt_misc: don't nullify Node->dentry in kill_node() kill_node() nullifies/checks Node->dentry to avoid double free. This complicates the next changes and this is very confusing: - we do not need to check dentry != NULL under entries_lock, kill_node() is always called under inode_lock(d_inode(root)) and we rely on this inode_lock() anyway, without this lock the MISC_FMT_OPEN_FILE cleanup could race with itself. - if kill_inode() was already called and ->dentry == NULL we should not even try to close e->interp_file. We can change bm_entry_write() to simply check !list_empty(list) before kill_node. Again, we rely on inode_lock(), in particular it saves us from the race with bm_status_write(), another caller of kill_node(). Link: http://lkml.kernel.org/r/20170922143641.GA17210@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index ce7181ea60fa..6451e7520e05 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -603,11 +603,7 @@ static void kill_node(Node *e) struct dentry *dentry; write_lock(&entries_lock); - dentry = e->dentry; - if (dentry) { - list_del_init(&e->list); - e->dentry = NULL; - } + list_del_init(&e->list); write_unlock(&entries_lock); if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { @@ -615,12 +611,11 @@ static void kill_node(Node *e) e->interp_file = NULL; } - if (dentry) { - drop_nlink(d_inode(dentry)); - d_drop(dentry); - dput(dentry); - simple_release_fs(&bm_mnt, &entry_count); - } + dentry = e->dentry; + drop_nlink(d_inode(dentry)); + d_drop(dentry); + dput(dentry); + simple_release_fs(&bm_mnt, &entry_count); } /* / */ @@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, root = file_inode(file)->i_sb->s_root; inode_lock(d_inode(root)); - kill_node(e); + if (!list_empty(&e->list)) + kill_node(e); inode_unlock(d_inode(root)); break; @@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer, inode_lock(d_inode(root)); while (!list_empty(&entries)) - kill_node(list_entry(entries.next, Node, list)); + kill_node(list_first_entry(&entries, Node, list)); inode_unlock(d_inode(root)); break; From 83f918274e4b841d6fb817861ea0c896fba0c179 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:48 -0700 Subject: [PATCH 30/51] exec: binfmt_misc: shift filp_close(interp_file) from kill_node() to bm_evict_inode() To ensure that load_misc_binary() can't use the partially destroyed Node, see also the next patch. The current logic looks wrong in any case, once we close interp_file it doesn't make any sense to delay kfree(inode->i_private), this Node is no longer valid. Even if the MISC_FMT_OPEN_FILE/interp_file checks were not racy (they are), load_misc_binary() should not try to reopen ->interpreter if MISC_FMT_OPEN_FILE is set but ->interp_file is NULL. And I can't understand why do we use filp_close(), not fput(). Link: http://lkml.kernel.org/r/20170922143644.GA17216@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6451e7520e05..203598ccb40a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) static void bm_evict_inode(struct inode *inode) { + Node *e = inode->i_private; + + if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) + filp_close(e->interp_file, NULL); + clear_inode(inode); - kfree(inode->i_private); + kfree(e); } static void kill_node(Node *e) @@ -606,11 +611,6 @@ static void kill_node(Node *e) list_del_init(&e->list); write_unlock(&entries_lock); - if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { - filp_close(e->interp_file, NULL); - e->interp_file = NULL; - } - dentry = e->dentry; drop_nlink(d_inode(dentry)); d_drop(dentry); From eb23aa0317eb1f08e8d9d36b8753d42f03b32764 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:51 -0700 Subject: [PATCH 31/51] exec: binfmt_misc: remove the confusing e->interp_file != NULL checks If MISC_FMT_OPEN_FILE flag is set e->interp_file must be valid or we have a bug which should not be silently ignored. Link: http://lkml.kernel.org/r/20170922143647.GA17222@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 203598ccb40a..babfe7bd56f0 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm) if (retval < 0) goto error; - if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) { + if (fmt->flags & MISC_FMT_OPEN_FILE) { interp_file = filp_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); @@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; - if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) + if (e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); clear_inode(inode); From 43a4f2619038002f48c78698c42c05692d4b4eb2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:55 -0700 Subject: [PATCH 32/51] exec: binfmt_misc: fix race between load_misc_binary() and kill_node() load_misc_binary() makes a local copy of fmt->interpreter under entries_lock to avoid the race with kill_node() but this is not enough; the whole Node can be freed after we drop entries_lock, not only the ->interpreter string. Add dget/dput(fmt->dentry) to ensure bm_evict_inode() can't destroy/free this Node. Link: http://lkml.kernel.org/r/20170922143650.GA17227@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Travis Gummels Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index babfe7bd56f0..f5f8c2541790 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -138,20 +138,23 @@ static int load_misc_binary(struct linux_binprm *bprm) retval = -ENOEXEC; if (!enabled) - goto ret; + return retval; /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); - if (fmt) + if (fmt) { + dget(fmt->dentry); strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); + } read_unlock(&entries_lock); if (!fmt) - goto ret; + return retval; /* Need to be able to load the file after exec */ + retval = -ENOENT; if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) - return -ENOENT; + goto ret; if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { retval = remove_arg_zero(bprm); @@ -238,6 +241,7 @@ static int load_misc_binary(struct linux_binprm *bprm) goto error; ret: + dput(fmt->dentry); return retval; error: if (fd_binary > 0) From 50097f74934e3ec8fb1e6f3087568b958972817d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:58 -0700 Subject: [PATCH 33/51] exec: binfmt_misc: kill the onstack iname[BINPRM_BUF_SIZE] array After the previous change "fmt" can't go away, we can kill iname/iname_addr and use fmt->interpreter. Link: http://lkml.kernel.org/r/20170922143653.GA17232@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index f5f8c2541790..2a46762def31 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -54,7 +54,7 @@ typedef struct { int size; /* size of magic/mask */ char *magic; /* magic or filename extension */ char *mask; /* mask, NULL for exact match */ - char *interpreter; /* filename of interpreter */ + const char *interpreter; /* filename of interpreter */ char *name; struct dentry *dentry; struct file *interp_file; @@ -131,8 +131,6 @@ static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; struct file *interp_file = NULL; - char iname[BINPRM_BUF_SIZE]; - const char *iname_addr = iname; int retval; int fd_binary = -1; @@ -143,10 +141,8 @@ static int load_misc_binary(struct linux_binprm *bprm) /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); - if (fmt) { + if (fmt) dget(fmt->dentry); - strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); - } read_unlock(&entries_lock); if (!fmt) return retval; @@ -198,13 +194,13 @@ static int load_misc_binary(struct linux_binprm *bprm) bprm->argc++; /* add the interp as argv[0] */ - retval = copy_strings_kernel(1, &iname_addr, bprm); + retval = copy_strings_kernel(1, &fmt->interpreter, bprm); if (retval < 0) goto error; bprm->argc++; /* Update interp in case binfmt_script needs it. */ - retval = bprm_change_interp(iname, bprm); + retval = bprm_change_interp(fmt->interpreter, bprm); if (retval < 0) goto error; @@ -213,7 +209,7 @@ static int load_misc_binary(struct linux_binprm *bprm) if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { - interp_file = open_exec(iname); + interp_file = open_exec(fmt->interpreter); } retval = PTR_ERR(interp_file); if (IS_ERR(interp_file)) From 8cb5d7482810b7eb1bb05bf4f71bc93ce35e5896 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 16:16:01 -0700 Subject: [PATCH 34/51] lib/lz4: make arrays static const, reduces object code size Don't populate the read-only arrays dec32table and dec64table on the stack, instead make them both static const. Makes the object code smaller by over 10K bytes: Before: text data bss dec hex filename 31500 0 0 31500 7b0c lib/lz4/lz4_decompress.o After: text data bss dec hex filename 20237 176 0 20413 4fbd lib/lz4/lz4_decompress.o (gcc version 7.2.0 x86_64) Link: http://lkml.kernel.org/r/20170921221939.20820-1-colin.king@canonical.com Signed-off-by: Colin Ian King Cc: Christophe JAILLET Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Arnd Bergmann Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lz4/lz4_decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index bd3574312b82..141734d255e4 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -85,8 +85,8 @@ static FORCE_INLINE int LZ4_decompress_generic( const BYTE * const lowLimit = lowPrefix - dictSize; const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; - const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; - const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; + static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; const int safeDecode = (endOnInput == endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); From 7240767450d6d8380fb153e2998a1bb4ede7b029 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Oct 2017 16:16:04 -0700 Subject: [PATCH 35/51] include/linux/bitfield.h: remove 32bit from FIELD_GET comment block I do not see anything that restricts this macro to 32 bit width. Link: http://lkml.kernel.org/r/1505921975-23379-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Jakub Kicinski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 8b9d6fff002d..f2deb71958b2 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -92,7 +92,7 @@ /** * FIELD_GET() - extract a bitfield element * @_mask: shifted mask defining the field's length and position - * @_reg: 32bit value of entire bitfield + * @_reg: value of entire bitfield * * FIELD_GET() extracts the field specified by @_mask from the * bitfield passed in as @_reg by masking and shifting it down. From 3181c38e4df257852a0c0a53552fd5c869402886 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 3 Oct 2017 16:16:07 -0700 Subject: [PATCH 36/51] kernel/sysctl.c: remove duplicate UINT_MAX check on do_proc_douintvec_conv() do_proc_douintvec_conv() has two UINT_MAX checks, we can remove one. This has no functional changes other than fixing a compiler warning: kernel/sysctl.c:2190]: (warning) Identical condition '*lvalp>UINT_MAX', second condition is always false Fixes: 4f2fec00afa60 ("sysctl: simplify unsigned int support") Link: http://lkml.kernel.org/r/20170919072918.12066-1-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Reported-by: David Binderman Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 423554ad3610..4da9e622471f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2186,8 +2186,6 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, int write, void *data) { if (write) { - if (*lvalp > UINT_MAX) - return -EINVAL; if (*lvalp > UINT_MAX) return -EINVAL; *valp = *lvalp; From f80c7dab95a1f0f968acbafe4426ee9525b6f6ab Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Oct 2017 16:16:10 -0700 Subject: [PATCH 37/51] mm: memcontrol: use vmalloc fallback for large kmem memcg arrays For quick per-memcg indexing, slab caches and list_lru structures maintain linear arrays of descriptors. As the number of concurrent memory cgroups in the system goes up, this requires large contiguous allocations (8k cgroups = order-5, 16k cgroups = order-6 etc.) for every existing slab cache and list_lru, which can easily fail on loaded systems. E.g.: mkdir: page allocation failure: order:5, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null) CPU: 1 PID: 6399 Comm: mkdir Not tainted 4.13.0-mm1-00065-g720bbe532b7c-dirty #481 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014 Call Trace: ? __alloc_pages_direct_compact+0x4c/0x110 __alloc_pages_nodemask+0xf50/0x1430 alloc_pages_current+0x60/0xc0 kmalloc_order_trace+0x29/0x1b0 __kmalloc+0x1f4/0x320 memcg_update_all_list_lrus+0xca/0x2e0 mem_cgroup_css_alloc+0x612/0x670 cgroup_apply_control_enable+0x19e/0x360 cgroup_mkdir+0x322/0x490 kernfs_iop_mkdir+0x55/0x80 vfs_mkdir+0xd0/0x120 SyS_mkdirat+0x6c/0xe0 SyS_mkdir+0x14/0x20 entry_SYSCALL_64_fastpath+0x18/0xad Mem-Info: active_anon:2965 inactive_anon:19 isolated_anon:0 active_file:100270 inactive_file:98846 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:7328 slab_unreclaimable:16402 mapped:771 shmem:52 pagetables:278 bounce:0 free:13718 free_pcp:0 free_cma:0 This output is from an artificial reproducer, but we have repeatedly observed order-7 failures in production in the Facebook fleet. These systems become useless as they cannot run more jobs, even though there is plenty of memory to allocate 128 individual pages. Use kvmalloc and kvzalloc to fall back to vmalloc space if these arrays prove too large for allocating them physically contiguous. Link: http://lkml.kernel.org/r/20170918184919.20644-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Josef Bacik Acked-by: Michal Hocko Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/list_lru.c | 12 ++++++------ mm/slab_common.c | 22 +++++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index 7a40fa2be858..f141f0c80ff3 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) { int size = memcg_nr_cache_ids; - nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); + nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); if (!nlru->memcg_lrus) return -ENOMEM; if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { - kfree(nlru->memcg_lrus); + kvfree(nlru->memcg_lrus); return -ENOMEM; } @@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) { __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); - kfree(nlru->memcg_lrus); + kvfree(nlru->memcg_lrus); } static int memcg_update_list_lru_node(struct list_lru_node *nlru, @@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, BUG_ON(old_size > new_size); old = nlru->memcg_lrus; - new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); + new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL); if (!new) return -ENOMEM; if (__memcg_init_list_lru_node(new, old_size, new_size)) { - kfree(new); + kvfree(new); return -ENOMEM; } @@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, nlru->memcg_lrus = new; spin_unlock_irq(&nlru->lock); - kfree(old); + kvfree(old); return 0; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 904a83be82de..80164599ca5d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -165,9 +165,9 @@ static int init_memcg_params(struct kmem_cache *s, if (!memcg_nr_cache_ids) return 0; - arr = kzalloc(sizeof(struct memcg_cache_array) + - memcg_nr_cache_ids * sizeof(void *), - GFP_KERNEL); + arr = kvzalloc(sizeof(struct memcg_cache_array) + + memcg_nr_cache_ids * sizeof(void *), + GFP_KERNEL); if (!arr) return -ENOMEM; @@ -178,15 +178,23 @@ static int init_memcg_params(struct kmem_cache *s, static void destroy_memcg_params(struct kmem_cache *s) { if (is_root_cache(s)) - kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); + kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); +} + +static void free_memcg_params(struct rcu_head *rcu) +{ + struct memcg_cache_array *old; + + old = container_of(rcu, struct memcg_cache_array, rcu); + kvfree(old); } static int update_memcg_params(struct kmem_cache *s, int new_array_size) { struct memcg_cache_array *old, *new; - new = kzalloc(sizeof(struct memcg_cache_array) + - new_array_size * sizeof(void *), GFP_KERNEL); + new = kvzalloc(sizeof(struct memcg_cache_array) + + new_array_size * sizeof(void *), GFP_KERNEL); if (!new) return -ENOMEM; @@ -198,7 +206,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size) rcu_assign_pointer(s->memcg_params.memcg_caches, new); if (old) - kfree_rcu(old, rcu); + call_rcu(&old->rcu, free_memcg_params); return 0; } From a70e43a59de9316e6fbad3b65557d0a24c099aca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 3 Oct 2017 16:16:13 -0700 Subject: [PATCH 38/51] lib/idr.c: fix comment for idr_replace() idr_replace() returns the old value on success, not 0. Link: http://lkml.kernel.org/r/20170918162642.37511-1-ebiggers3@gmail.com Signed-off-by: Eric Biggers Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index f9adf4805fd7..edd9b2be1651 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -146,8 +146,8 @@ EXPORT_SYMBOL(idr_get_next_ext); * idr_alloc() and idr_remove() (as long as the ID being removed is not * the one being replaced!). * - * Returns: 0 on success. %-ENOENT indicates that @id was not found. - * %-EINVAL indicates that @id or @ptr were not valid. + * Returns: the old value on success. %-ENOENT indicates that @id was not + * found. %-EINVAL indicates that @id or @ptr were not valid. */ void *idr_replace(struct idr *idr, void *ptr, int id) { From f64ac5e6e30668216cf489d73ba8a96e372d78c6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:16 -0700 Subject: [PATCH 39/51] mm, memory_hotplug: add scheduling point to __add_pages Patch series "mm, memory_hotplug: fix few soft lockups in memory hotadd". Johannes has noticed few soft lockups when adding a large nvdimm device. All of them were caused by a long loop without any explicit cond_resched which is a problem for !PREEMPT kernels. The fix is quite straightforward. Just make sure that cond_resched gets called from time to time. This patch (of 3): __add_pages gets a pfn range to add and there is no upper bound for a single call. This is usually a memory block aligned size for the regular memory hotplug - smaller sizes are usual for memory balloning drivers, or the whole NUMA node for physical memory online. There is no explicit scheduling point in that code path though. This can lead to long latencies while __add_pages is executed and we have even seen a soft lockup report during nvdimm initialization with !PREEMPT kernel NMI watchdog: BUG: soft lockup - CPU#11 stuck for 23s! [kworker/u641:3:832] [...] Workqueue: events_unbound async_run_entry_fn task: ffff881809270f40 ti: ffff881809274000 task.ti: ffff881809274000 RIP: _raw_spin_unlock_irqrestore+0x11/0x20 RSP: 0018:ffff881809277b10 EFLAGS: 00000286 [...] Call Trace: sparse_add_one_section+0x13d/0x18e __add_pages+0x10a/0x1d0 arch_add_memory+0x4a/0xc0 devm_memremap_pages+0x29d/0x430 pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 DWARF2 unwinder stuck at ret_from_fork+0x3f/0x70 Fix this by adding cond_resched once per each memory section in the given pfn range. Each section is constant amount of work which itself is not too expensive but many of them will just add up. Link: http://lkml.kernel.org/r/20170918121410.24466-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e882cb6da994..23d5bd968950 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, if (err && (err != -EEXIST)) break; err = 0; + cond_resched(); } vmemmap_populate_print_last(); out: From 9b6e63cbf85b89b2dbffa4955dbf2df8250e5375 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:19 -0700 Subject: [PATCH 40/51] mm, page_alloc: add scheduling point to memmap_init_zone memmap_init_zone gets a pfn range to initialize and it can be really large resulting in a soft lockup on non-preemptible kernels NMI watchdog: BUG: soft lockup - CPU#31 stuck for 23s! [kworker/u642:5:1720] [...] task: ffff88ecd7e902c0 ti: ffff88eca4e50000 task.ti: ffff88eca4e50000 RIP: move_pfn_range_to_zone+0x185/0x1d0 [...] Call Trace: devm_memremap_pages+0x2c7/0x430 pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 Fix this by adding a scheduling point once per page block. Link: http://lkml.kernel.org/r/20170918121410.24466-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 38d165a87860..77e4d3c5c57b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5367,6 +5367,7 @@ not_early: __init_single_page(page, pfn, zone, nid); set_pageblock_migratetype(page, MIGRATE_MOVABLE); + cond_resched(); } else { __init_single_pfn(pfn, zone, nid); } From 1fdcce6e16c54facc4f0688630d3b9ecfcaa411f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:23 -0700 Subject: [PATCH 41/51] memremap: add scheduling point to devm_memremap_pages devm_memremap_pages is initializing struct pages in for_each_device_pfn and that can take quite some time. We have even seen a soft lockup triggering on a non preemptive kernel NMI watchdog: BUG: soft lockup - CPU#61 stuck for 22s! [kworker/u641:11:1808] [...] RIP: 0010:[] [] devm_memremap_pages+0x327/0x430 [...] Call Trace: pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 fix this by adding cond_resched every 1024 pages. Link: http://lkml.kernel.org/r/20170918121410.24466-4-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 6bcbfbf1a8fd..403ab9cdb949 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -350,7 +350,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; - int error, nid, is_ram; + int error, nid, is_ram, i = 0; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -448,6 +448,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, list_del(&page->lru); page->pgmap = pgmap; percpu_ref_get(ref); + if (!(++i % 1024)) + cond_resched(); } devres_add(dev, page_map); return __va(res->start); From c9653850c90d6050197bc76ba672e00a7771aea5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 3 Oct 2017 16:16:26 -0700 Subject: [PATCH 42/51] kernel/kcmp.c: drop branch leftover typo The else branch been left over and escaped the source code refresh. Not a problem but better clean it up. Fixes: 0791e3644e5e ("kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files") Link: http://lkml.kernel.org/r/20170917165838.GA1887@uranus.lan Reported-by: Eugene Syromiatnikov Signed-off-by: Cyrill Gorcunov Acked-by: Andrei Vagin Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcmp.c b/kernel/kcmp.c index ea34ed8bb952..055bb2962a0b 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -131,7 +131,7 @@ static int kcmp_epoll_target(struct task_struct *task1, if (filp_epoll) { filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); fput(filp_epoll); - } else + } if (IS_ERR(filp_tgt)) return PTR_ERR(filp_tgt); From 1dd2bfc86818ddbc95f98e312e7704350223fd7d Mon Sep 17 00:00:00 2001 From: YASUAKI ISHIMATSU Date: Tue, 3 Oct 2017 16:16:29 -0700 Subject: [PATCH 43/51] mm/memory_hotplug: change pfn_to_section_nr/section_nr_to_pfn macro to inline function pfn_to_section_nr() and section_nr_to_pfn() are defined as macro. pfn_to_section_nr() has no issue even if it is defined as macro. But section_nr_to_pfn() has overflow issue if sec is defined as int. section_nr_to_pfn() just shifts sec by PFN_SECTION_SHIFT. If sec is defined as unsigned long, section_nr_to_pfn() returns pfn as 64 bit value. But if sec is defined as int, section_nr_to_pfn() returns pfn as 32 bit value. __remove_section() calculates start_pfn using section_nr_to_pfn() and scn_nr defined as int. So if hot-removed memory address is over 16TB, overflow issue occurs and section_nr_to_pfn() does not calculate correct pfn. To make callers use proper arg, the patch changes the macros to inline functions. Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory") Link: http://lkml.kernel.org/r/e643a387-e573-6bbf-d418-c60c8ee3d15e@gmail.com Signed-off-by: Yasuaki Ishimatsu Acked-by: Michal Hocko Cc: Xishi Qiu Cc: Reza Arbab Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++-- mm/memory_hotplug.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 356a814e7c8e..c8f89417740b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1094,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +static inline unsigned long pfn_to_section_nr(unsigned long pfn) +{ + return pfn >> PFN_SECTION_SHIFT; +} +static inline unsigned long section_nr_to_pfn(unsigned long sec) +{ + return sec << PFN_SECTION_SHIFT; +} #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 23d5bd968950..efd1ad37bb57 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -551,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, return ret; scn_nr = __section_nr(ms); - start_pfn = section_nr_to_pfn(scn_nr); + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); sparse_remove_one_section(zone, ms, map_offset); From d09b0137d204bebeaafed672bc5a244e9ac92edb Mon Sep 17 00:00:00 2001 From: YASUAKI ISHIMATSU Date: Tue, 3 Oct 2017 16:16:32 -0700 Subject: [PATCH 44/51] mm/memory_hotplug: define find_{smallest|biggest}_section_pfn as unsigned long find_{smallest|biggest}_section_pfn()s find the smallest/biggest section and return the pfn of the section. But the functions are defined as int. So the functions always return 0x00000000 - 0xffffffff. It means if memory address is over 16TB, the functions does not work correctly. To handle 64 bit value, the patch defines find_{smallest|biggest}_section_pfn() as unsigned long. Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory") Link: http://lkml.kernel.org/r/d9d5593a-d0a4-c4be-ab08-493df59a85c6@gmail.com Signed-off-by: Yasuaki Ishimatsu Acked-by: Michal Hocko Cc: Xishi Qiu Cc: Reza Arbab Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index efd1ad37bb57..d4b5f29906b9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages); #ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ -static int find_smallest_section_pfn(int nid, struct zone *zone, +static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { @@ -363,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone, } /* find the biggest valid pfn in the range [start_pfn, end_pfn). */ -static int find_biggest_section_pfn(int nid, struct zone *zone, +static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { From 90ceb2a3ad868f800eb1c9f4ede650daddd94b77 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:35 -0700 Subject: [PATCH 45/51] kernel/params.c: fix the maximum length in param_get_string The length parameter of strlcpy() is supposed to reflect the size of the target buffer, not of the source string. Harmless in this case as the buffer is PAGE_SIZE long and the source string is always much shorter than this, but conceptually wrong, so let's fix it. Link: http://lkml.kernel.org/r/20170928162515.24846b4f@endymion Signed-off-by: Jean Delvare Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index 1cd8f1a895a8..8283ba045f4f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -507,7 +507,7 @@ EXPORT_SYMBOL(param_set_copystring); int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - return strlcpy(buffer, kps->string, kps->maxlen); + return strlcpy(buffer, kps->string, PAGE_SIZE); } EXPORT_SYMBOL(param_get_string); From 96802e6b1dbf29d3012b39503c5dd6d9d8e82955 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:38 -0700 Subject: [PATCH 46/51] kernel/params.c: fix an overflow in param_attr_show Function param_attr_show could overflow the buffer it is operating on. The buffer size is PAGE_SIZE, and the string returned by attribute->param->ops->get is generated by scnprintf(buffer, PAGE_SIZE, ...) so it could be PAGE_SIZE - 1 long, with the terminating '\0' at the very end of the buffer. Calling strcat(..., "\n") on this isn't safe, as the '\0' will be replaced by '\n' (OK) and then another '\0' will be added past the end of the buffer (not OK.) Simply add the trailing '\n' when writing the attribute contents to the buffer originally. This is safe, and also faster. Credits to Teradata for discovering this issue. Link: http://lkml.kernel.org/r/20170928162602.60c379c7@endymion Signed-off-by: Jean Delvare Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 8283ba045f4f..0cca488263dd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -224,7 +224,7 @@ char *parse_args(const char *doing, } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ { \ - return scnprintf(buffer, PAGE_SIZE, format, \ + return scnprintf(buffer, PAGE_SIZE, format "\n", \ *((type *)kp->arg)); \ } \ const struct kernel_param_ops param_ops_##name = { \ @@ -270,7 +270,7 @@ EXPORT_SYMBOL(param_set_charp); int param_get_charp(char *buffer, const struct kernel_param *kp) { - return scnprintf(buffer, PAGE_SIZE, "%s", *((char **)kp->arg)); + return scnprintf(buffer, PAGE_SIZE, "%s\n", *((char **)kp->arg)); } EXPORT_SYMBOL(param_get_charp); @@ -301,7 +301,7 @@ EXPORT_SYMBOL(param_set_bool); int param_get_bool(char *buffer, const struct kernel_param *kp) { /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N'); + return sprintf(buffer, "%c\n", *(bool *)kp->arg ? 'Y' : 'N'); } EXPORT_SYMBOL(param_get_bool); @@ -360,7 +360,7 @@ EXPORT_SYMBOL(param_set_invbool); int param_get_invbool(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); + return sprintf(buffer, "%c\n", (*(bool *)kp->arg) ? 'N' : 'Y'); } EXPORT_SYMBOL(param_get_invbool); @@ -460,8 +460,9 @@ static int param_array_get(char *buffer, const struct kernel_param *kp) struct kernel_param p = *kp; for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { + /* Replace \n with comma */ if (i) - buffer[off++] = ','; + buffer[off - 1] = ','; p.arg = arr->elem + arr->elemsize * i; check_kparam_locked(p.mod); ret = arr->ops->get(buffer + off, &p); @@ -507,7 +508,7 @@ EXPORT_SYMBOL(param_set_copystring); int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - return strlcpy(buffer, kps->string, PAGE_SIZE); + return scnprintf(buffer, PAGE_SIZE, "%s\n", kps->string); } EXPORT_SYMBOL(param_get_string); @@ -549,10 +550,6 @@ static ssize_t param_attr_show(struct module_attribute *mattr, kernel_param_lock(mk->mod); count = attribute->param->ops->get(buf, attribute->param); kernel_param_unlock(mk->mod); - if (count > 0) { - strcat(buf, "\n"); - ++count; - } return count; } From e0596c80f442d1e1221c17dbb71b2aed43909221 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:41 -0700 Subject: [PATCH 47/51] kernel/params.c: improve STANDARD_PARAM_DEF readability Align the parameters passed to STANDARD_PARAM_DEF for clarity. Link: http://lkml.kernel.org/r/20170928162728.756143cc@endymion Signed-off-by: Jean Delvare Suggested-by: Ingo Molnar Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 0cca488263dd..cc9108c2a1fd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -236,14 +236,14 @@ char *parse_args(const char *doing, EXPORT_SYMBOL(param_ops_##name) -STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); -STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); -STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); -STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); -STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); -STANDARD_PARAM_DEF(long, long, "%li", kstrtol); -STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); -STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); +STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); +STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); +STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); +STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); +STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); +STANDARD_PARAM_DEF(long, long, "%li", kstrtol); +STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); +STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); int param_set_charp(const char *val, const struct kernel_param *kp) { From 656d61ce9666209c4c4a13c71902d3ee70d1ff6f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 3 Oct 2017 16:16:45 -0700 Subject: [PATCH 48/51] lib/ratelimit.c: use deferred printk() version printk_ratelimit() invokes ___ratelimit() which may invoke a normal printk() (pr_warn() in this particular case) to warn about suppressed output. Given that printk_ratelimit() may be called from anywhere, that pr_warn() is dangerous - it may end up deadlocking the system. Fix ___ratelimit() by using deferred printk(). Sasha reported the following lockdep error: : Unregister pv shared memory for cpu 8 : select_fallback_rq: 3 callbacks suppressed : process 8583 (trinity-c78) no longer affine to cpu8 : : ====================================================== : WARNING: possible circular locking dependency detected : 4.14.0-rc2-next-20170927+ #252 Not tainted : ------------------------------------------------------ : migration/8/62 is trying to acquire lock: : (&port_lock_key){-.-.}, at: serial8250_console_write() : : but task is already holding lock: : (&rq->lock){-.-.}, at: sched_cpu_dying() : : which lock already depends on the new lock. : : : the existing dependency chain (in reverse order) is: : : -> #3 (&rq->lock){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock() : task_fork_fair() : sched_fork() : copy_process.part.31() : _do_fork() : kernel_thread() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #2 (&p->pi_lock){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : try_to_wake_up() : default_wake_function() : woken_wake_function() : __wake_up_common() : __wake_up_common_lock() : __wake_up() : tty_wakeup() : tty_port_default_wakeup() : tty_port_tty_wakeup() : uart_write_wakeup() : serial8250_tx_chars() : serial8250_handle_irq.part.25() : serial8250_default_handle_irq() : serial8250_interrupt() : __handle_irq_event_percpu() : handle_irq_event_percpu() : handle_irq_event() : handle_level_irq() : handle_irq() : do_IRQ() : ret_from_intr() : native_safe_halt() : default_idle() : arch_cpu_idle() : default_idle_call() : do_idle() : cpu_startup_entry() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #1 (&tty->write_wait){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : __wake_up_common_lock() : __wake_up() : tty_wakeup() : tty_port_default_wakeup() : tty_port_tty_wakeup() : uart_write_wakeup() : serial8250_tx_chars() : serial8250_handle_irq.part.25() : serial8250_default_handle_irq() : serial8250_interrupt() : __handle_irq_event_percpu() : handle_irq_event_percpu() : handle_irq_event() : handle_level_irq() : handle_irq() : do_IRQ() : ret_from_intr() : native_safe_halt() : default_idle() : arch_cpu_idle() : default_idle_call() : do_idle() : cpu_startup_entry() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #0 (&port_lock_key){-.-.}: : check_prev_add() : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : serial8250_console_write() : univ8250_console_write() : console_unlock() : vprintk_emit() : vprintk_default() : vprintk_func() : printk() : ___ratelimit() : __printk_ratelimit() : select_fallback_rq() : sched_cpu_dying() : cpuhp_invoke_callback() : take_cpu_down() : multi_cpu_stop() : cpu_stopper_thread() : smpboot_thread_fn() : kthread() : ret_from_fork() : : other info that might help us debug this: : : Chain exists of: : &port_lock_key --> &p->pi_lock --> &rq->lock : : Possible unsafe locking scenario: : : CPU0 CPU1 : ---- ---- : lock(&rq->lock); : lock(&p->pi_lock); : lock(&rq->lock); : lock(&port_lock_key); : : *** DEADLOCK *** : : 4 locks held by migration/8/62: : #0: (&p->pi_lock){-.-.}, at: sched_cpu_dying() : #1: (&rq->lock){-.-.}, at: sched_cpu_dying() : #2: (printk_ratelimit_state.lock){....}, at: ___ratelimit() : #3: (console_lock){+.+.}, at: vprintk_emit() : : stack backtrace: : CPU: 8 PID: 62 Comm: migration/8 Not tainted 4.14.0-rc2-next-20170927+ #252 : Call Trace: : dump_stack() : print_circular_bug() : check_prev_add() : ? add_lock_to_list.isra.26() : ? check_usage() : ? kvm_clock_read() : ? kvm_sched_clock_read() : ? sched_clock() : ? check_preemption_disabled() : __lock_acquire() : ? __lock_acquire() : ? add_lock_to_list.isra.26() : ? debug_check_no_locks_freed() : ? memcpy() : lock_acquire() : ? serial8250_console_write() : _raw_spin_lock_irqsave() : ? serial8250_console_write() : serial8250_console_write() : ? serial8250_start_tx() : ? lock_acquire() : ? memcpy() : univ8250_console_write() : console_unlock() : ? __down_trylock_console_sem() : vprintk_emit() : vprintk_default() : vprintk_func() : printk() : ? show_regs_print_info() : ? lock_acquire() : ___ratelimit() : __printk_ratelimit() : select_fallback_rq() : sched_cpu_dying() : ? sched_cpu_starting() : ? rcutree_dying_cpu() : ? sched_cpu_starting() : cpuhp_invoke_callback() : ? cpu_disable_common() : take_cpu_down() : ? trace_hardirqs_off_caller() : ? cpuhp_invoke_callback() : multi_cpu_stop() : ? __this_cpu_preempt_check() : ? cpu_stop_queue_work() : cpu_stopper_thread() : ? cpu_stop_create() : smpboot_thread_fn() : ? sort_range() : ? schedule() : ? __kthread_parkme() : kthread() : ? sort_range() : ? kthread_create_on_node() : ret_from_fork() : process 9121 (trinity-c78) no longer affine to cpu8 : smpboot: CPU 8 is now offline Link: http://lkml.kernel.org/r/20170928120405.18273-1-sergey.senozhatsky@gmail.com Fixes: 6b1d174b0c27b ("ratelimit: extend to print suppressed messages on release") Signed-off-by: Sergey Senozhatsky Reported-by: Sasha Levin Reviewed-by: Petr Mladek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Steven Rostedt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ratelimit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 08f8043cac61..d01f47135239 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -48,7 +48,9 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { - pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); + printk_deferred(KERN_WARNING + "%s: %d callbacks suppressed\n", + func, rs->missed); rs->missed = 0; } } From d22e3d69ee1a3f83ff7cc943af63de48b6156dcf Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:16:49 -0700 Subject: [PATCH 49/51] m32r: fix build failure The allmodconfig build of m32r is failing with the error: lib/mpi/mpih-div.o: In function 'mpihelp_divrem': mpih-div.c:(.text+0x40): undefined reference to 'abort' mpih-div.c:(.text+0x40): relocation truncated to fit: R_M32R_26_PCREL_RELA against undefined symbol 'abort' The function 'abort' was never defined for the m32r architecture. Create 'abort' as is done in other arch like 'arm' and 'unicore32'. Link: http://lkml.kernel.org/r/1506727220-6108-1-git-send-email-sudip.mukherjee@codethink.co.uk Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/traps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 647dd94a0c39..72b96f282689 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -114,6 +114,15 @@ static void set_eit_vector_entries(void) _flush_cache_copyback_all(); } +void abort(void) +{ + BUG(); + + /* if that doesn't kill us, halt */ + panic("Oops failed to kill thread"); +} +EXPORT_SYMBOL(abort); + void __init trap_init(void) { set_eit_vector_entries(); From a08ffbef4ab799351d0610bbdbeaa1ee746b9065 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 3 Oct 2017 16:16:51 -0700 Subject: [PATCH 50/51] checkpatch: fix ignoring cover-letter logic Currently running checkpatch on a directory with a cover-letter.patch file reports the following error: ----------------------------------------- patches/smp-v2/v2-0000-cover-letter.patch ----------------------------------------- ERROR: Does not appear to be a unified-diff format patch The logic to suppress the unified-diff check for cover letters is there but is checking $file instead of $filename. Fix the variable to use the correct one. Link: http://lkml.kernel.org/r/20170909090406.31523-1-shorne@gmail.com Signed-off-by: Stafford Horne Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index dd2c262aebbf..8b80bac055e4 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6390,7 +6390,7 @@ sub process { exit(0); } - if (!$is_patch && $file !~ /cover-letter\.patch$/) { + if (!$is_patch && $filename !~ /cover-letter\.patch$/) { ERROR("NOT_UNIFIED_DIFF", "Does not appear to be a unified-diff format patch\n"); } From 32e57c29e3c038ac802b7cc214a8795a4234055f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 3 Oct 2017 16:16:54 -0700 Subject: [PATCH 51/51] include/linux/fs.h: fix comment about struct address_space Before commit 9c5d760b8d22 ("mm: split gfp_mask and mapping flags into separate fields") the private_* fields of struct adrress_space were grouped together and using "ditto" in comments describing the last fields was correct. With introduction of gpf_mask between private_lock and private_list "ditto" references the wrong description. Fix it by using the elaborate description. Link: http://lkml.kernel.org/r/1507009987-8746-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..13dab191a23e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -403,7 +403,7 @@ struct address_space { unsigned long flags; /* error bits */ spinlock_t private_lock; /* for use by the address_space */ gfp_t gfp_mask; /* implicit gfp mask for allocations */ - struct list_head private_list; /* ditto */ + struct list_head private_list; /* for use by the address_space */ void *private_data; /* ditto */ errseq_t wb_err; } __attribute__((aligned(sizeof(long)))) __randomize_layout;