From f51667685749edadb7cad45a51003e8ebf2e8426 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 17 Dec 2017 22:00:59 -0500 Subject: [PATCH 01/17] ext4: fix up remaining files with SPDX cleanups A number of ext4 source files were skipped due because their copyright permission statements didn't match the expected text used by the automated conversion utilities. I've added SPDX tags for the rest. While looking at some of these files, I've noticed that we have quite a bit of variation on the licenses that were used --- in particular some of the Red Hat licenses on the jbd2 files use a GPL2+ license, and we have some files that have a LGPL-2.1 license (which was quite surprising). I've not attempted to do any license changes. Even if it is perfectly legal to relicense to GPL 2.0-only for consistency's sake, that should be done with ext4 developer community discussion. Signed-off-by: Theodore Ts'o --- fs/ext4/acl.h | 2 +- fs/ext4/ext4.h | 2 +- fs/ext4/ext4_extents.h | 14 +------------- fs/ext4/ext4_jbd2.h | 5 +---- fs/ext4/extents.c | 14 +------------- fs/ext4/extents_status.h | 2 +- fs/ext4/fsmap.c | 15 +-------------- fs/ext4/fsmap.h | 15 +-------------- fs/ext4/hash.c | 6 +----- fs/ext4/inline.c | 10 +--------- fs/ext4/mballoc.c | 14 +------------- fs/ext4/mballoc.h | 2 +- fs/ext4/migrate.c | 9 +-------- fs/ext4/move_extent.c | 10 +--------- fs/ext4/super.c | 1 + fs/ext4/truncate.h | 2 +- fs/ext4/xattr.h | 2 +- fs/jbd2/checkpoint.c | 5 +---- fs/jbd2/commit.c | 5 +---- fs/jbd2/journal.c | 5 +---- fs/jbd2/recovery.c | 5 +---- fs/jbd2/revoke.c | 5 +---- fs/jbd2/transaction.c | 5 +---- 23 files changed, 23 insertions(+), 132 deletions(-) diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index a48fc5ae2701..9b63f5416a2f 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/acl.h diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4e091eae38b1..891d7636a9d0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * ext4.h * diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 8ecf84b8f5a1..98fb0c119c68 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ #ifndef _EXT4_EXTENTS diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 48143e32411c..15b6dd733780 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * ext4_jbd2.h * @@ -5,10 +6,6 @@ * * Copyright 1998--1999 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Ext4-specific journaling extensions. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c941251ac0c0..054416e9d827 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas @@ -5,19 +6,6 @@ * Architecture independence: * Copyright (c) 2005, Bull S.A. * Written by Pierre Peiffer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ /* diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index ca90fc96f47e..8efdeb903d6b 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * fs/ext4/extents_status.h * diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 7ec340898598..e871c4bf18e9 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * * Author: Darrick J. Wong - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ext4.h" #include diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h index 9a2cd367cc66..68c8001fee85 100644 --- a/fs/ext4/fsmap.h +++ b/fs/ext4/fsmap.h @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * * Author: Darrick J. Wong - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __EXT4_FSMAP_H__ #define __EXT4_FSMAP_H__ diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 00c6dd29e621..e22dcfab308b 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This file is released under the GPL v2. - * - * This file may be redistributed under the terms of the GNU Public - * License. */ #include diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1367553c43bb..8b1f2901a5df 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1,15 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2012 Taobao. * Written by Tao Ma - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d9f8b90a93ed..1a7ea5f9276f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dcf52540f379..88c98f17e3d9 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * fs/ext4/mballoc.h * diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index cf5181b62df1..61a9d1927817 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -1,15 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2007 * Author Aneesh Kumar K.V * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #include diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9bb36909ec92..b96e4bd3b3ec 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. * Written by Takashi Sato * Akira Fujita - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c46693a14d7..e5b9a305ff72 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/super.c * diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index b64a9fa0ff41..0cb13badf473 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/truncate.h * diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index f8cc07588ac9..dd54c4f995c8 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/xattr.h diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4055f51617ef..c125d662777c 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/checkpoint.c * @@ -5,10 +6,6 @@ * * Copyright 1999 Red Hat Software --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Checkpoint routines for the generic filesystem journaling code. * Part of the ext2fs journaling system. * diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3c1c31321d9b..8de0e7723316 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/commit.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 67546c7ad473..3fbf48ec2188 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/journal.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Generic filesystem journal-writing code; part of the ext2fs * journaling system. * diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 02dd3360cb20..f99910b69c78 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/recovery.c * @@ -5,10 +6,6 @@ * * Copyright 1999-2000 Red Hat Software --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal recovery routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f9aefcda5854..696ef15ec942 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/revoke.c * @@ -5,10 +6,6 @@ * * Copyright 2000 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal revoke routines for the generic filesystem journaling code; * part of the ext2fs journaling system. * diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8b08044b3120..881a9e09ca4e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/transaction.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Generic filesystem transaction handling code; part of the ext2fs * journaling system. * From 3876bbe27d04b848750d5310a37d6b76b593f648 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Sun, 7 Jan 2018 16:22:35 -0500 Subject: [PATCH 02/17] mbcache: initialize entry->e_referenced in mb_cache_entry_create() KMSAN reported use of uninitialized |entry->e_referenced| in a condition in mb_cache_shrink(): ================================================================== BUG: KMSAN: use of uninitialized memory in mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 CPU: 2 PID: 816 Comm: kswapd1 Not tainted 4.11.0-rc5+ #2877 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 mb_cache_scan+0x67/0x80 fs/mbcache.c:321 do_shrink_slab mm/vmscan.c:397 [inline] shrink_slab+0xc3d/0x12d0 mm/vmscan.c:500 shrink_node+0x208f/0x2fd0 mm/vmscan.c:2603 kswapd_shrink_node mm/vmscan.c:3172 [inline] balance_pgdat mm/vmscan.c:3289 [inline] kswapd+0x160f/0x2850 mm/vmscan.c:3478 kthread+0x46c/0x5f0 kernel/kthread.c:230 ret_from_fork+0x29/0x40 arch/x86/entry/entry_64.S:430 chained origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_save_stack mm/kmsan/kmsan.c:317 [inline] kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 __msan_store_shadow_origin_1+0xac/0x110 mm/kmsan/kmsan_instr.c:257 mb_cache_entry_create+0x3b3/0xc60 fs/mbcache.c:95 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 mb_cache_entry_create+0x283/0xc60 fs/mbcache.c:86 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org # v4.6 --- fs/mbcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index b8b8b9ced9f8..46b23bb432fe 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -94,6 +94,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, entry->e_key = key; entry->e_value = value; entry->e_reusable = reusable; + entry->e_referenced = 0; head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { From bbe45d2460da98785cb9453fb0b42d9b2e79dd99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 7 Jan 2018 16:35:20 -0500 Subject: [PATCH 03/17] mbcache: revert "fs/mbcache.c: make count_objects() more robust" This reverts commit d5dabd633922ac5ee5bcc67748f7defb8b211469. This patch did absolutely nothing, because ->c_entry_count is unsigned. In addition if there is a bug in how mbcache maintains its entry count, it needs to be fixed, not just hacked around. (There is no obvious bug, though.) Cc: Jan Kara Cc: Jiang Biao Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/mbcache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/mbcache.c b/fs/mbcache.c index 46b23bb432fe..49c5b25bfa8c 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -270,9 +270,6 @@ static unsigned long mb_cache_count(struct shrinker *shrink, struct mb_cache *cache = container_of(shrink, struct mb_cache, c_shrink); - /* Unlikely, but not impossible */ - if (unlikely(cache->c_entry_count < 0)) - return 0; return cache->c_entry_count; } From c0b24625979284dd212423320fe1c84fe244ed7f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 7 Jan 2018 16:38:43 -0500 Subject: [PATCH 04/17] dax: pass detailed error code from dax_iomap_fault() Ext4 needs to pass through error from its iomap handler to the page fault handler so that it can properly detect ENOSPC and force transaction commit and retry the fault (and block allocation). Add argument to dax_iomap_fault() for passing such error. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/dax.c | 9 ++++++--- fs/ext2/file.c | 2 +- fs/ext4/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/dax.h | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 95981591977a..f3afa1d6156c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1096,7 +1096,7 @@ static bool dax_fault_is_synchronous(unsigned long flags, } static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, - const struct iomap_ops *ops) + int *iomap_errp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; @@ -1149,6 +1149,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); + if (iomap_errp) + *iomap_errp = error; if (error) { vmf_ret = dax_fault_return(error); goto unlock_entry; @@ -1488,6 +1490,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * @vmf: The description of the fault * @pe_size: Size of the page to fault in * @pfnp: PFN to insert for synchronous faults if fsync is required + * @iomap_errp: Storage for detailed error code in case of error * @ops: Iomap ops passed from the file system * * When a page fault occurs, filesystems may call this helper in @@ -1496,11 +1499,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * successfully. */ int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - pfn_t *pfnp, const struct iomap_ops *ops) + pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { switch (pe_size) { case PE_SIZE_PTE: - return dax_iomap_pte_fault(vmf, pfnp, ops); + return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); case PE_SIZE_PMD: return dax_iomap_pmd_fault(vmf, pfnp, ops); default: diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 2da67699dc33..09640220fda8 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf) } down_read(&ei->dax_sem); - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a0ae27b1bc66..1c7cd882d998 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -314,7 +314,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, } else { down_read(&EXT4_I(inode)->i_mmap_sem); } - result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops); + result = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &ext4_iomap_ops); if (write) { ext4_journal_stop(handle); /* Handling synchronous page fault? */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8601275cc5e6..9ea08326f876 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1048,7 +1048,7 @@ __xfs_filemap_fault( if (IS_DAX(inode)) { pfn_t pfn; - ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { diff --git a/include/linux/dax.h b/include/linux/dax.h index 5258346c558c..0185ecdae135 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -96,7 +96,7 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - pfn_t *pfnp, const struct iomap_ops *ops); + pfn_t *pfnp, int *errp, const struct iomap_ops *ops); int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); From 22446423108f3687167c9fdc080e6f21dd784d18 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 7 Jan 2018 16:41:01 -0500 Subject: [PATCH 05/17] ext4: fix ENOSPC handling in DAX page fault handler When allocation of underlying block for a page fault fails, we fail the fault with SIGBUS. However we may well hit ENOSPC just due to lots of free blocks being held by the running / committing transaction. So propagate the error from ext4_iomap_begin() and implement do standard allocation retry loop in ext4_dax_huge_fault(). Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1c7cd882d998..fb6f023622fe 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -280,7 +280,8 @@ out: static int ext4_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int result; + int result, error = 0; + int retries = 0; handle_t *handle = NULL; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; @@ -304,6 +305,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, sb_start_pagefault(sb); file_update_time(vmf->vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); +retry: handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); if (IS_ERR(handle)) { @@ -314,9 +316,13 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, } else { down_read(&EXT4_I(inode)->i_mmap_sem); } - result = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &ext4_iomap_ops); + result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops); if (write) { ext4_journal_stop(handle); + + if ((result & VM_FAULT_ERROR) && error == -ENOSPC && + ext4_should_retry_alloc(sb, &retries)) + goto retry; /* Handling synchronous page fault? */ if (result & VM_FAULT_NEEDDSYNC) result = dax_finish_sync_fault(vmf, pe_size, pfn); From e7093f0d6371cefb4934c8f98847a56cb854e4a9 Mon Sep 17 00:00:00 2001 From: Petros Koutoupis Date: Sun, 7 Jan 2018 23:36:19 -0500 Subject: [PATCH 06/17] ext4: fixed alignment and minor code cleanup in ext4.h Signed-off-by: Petros Koutoupis Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 891d7636a9d0..3241475a1733 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -611,10 +611,10 @@ enum { /* * Flags used by ext4_free_blocks */ -#define EXT4_FREE_BLOCKS_METADATA 0x0001 -#define EXT4_FREE_BLOCKS_FORGET 0x0002 -#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 -#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 +#define EXT4_FREE_BLOCKS_METADATA 0x0001 +#define EXT4_FREE_BLOCKS_FORGET 0x0002 +#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 +#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 @@ -1986,10 +1986,10 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) /* Legal values for the dx_root hash_version field: */ -#define DX_HASH_LEGACY 0 -#define DX_HASH_HALF_MD4 1 -#define DX_HASH_TEA 2 -#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 #define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_TEA_UNSIGNED 5 @@ -2000,7 +2000,6 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, struct shash_desc shash; char ctx[4]; } desc; - int err; BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); @@ -2008,8 +2007,7 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, desc.shash.flags = 0; *(u32 *)desc.ctx = crc; - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); + BUG_ON(crypto_shash_update(&desc.shash, address, length)); return *(u32 *)desc.ctx; } From a90ac0f5dc17f23d15f20abdb9d08ef07f155480 Mon Sep 17 00:00:00 2001 From: piaojun Date: Tue, 9 Jan 2018 21:32:41 -0500 Subject: [PATCH 07/17] ext4: no need flush workqueue before destroying it destroy_workqueue() will do flushing work for us. Signed-off-by: Jun Piao Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5b9a305ff72..2a7faf94bb90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -871,7 +871,6 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); - flush_workqueue(sbi->rsv_conversion_wq); destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { From 9ee93ba3c430d5b5140ab72738dc70c4c54990e0 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Tue, 9 Jan 2018 23:57:52 -0500 Subject: [PATCH 08/17] mbcache: make sure c_entry_count is not decremented past zero Signed-off-by: Jiang Biao Signed-off-by: Theodore Ts'o CC: Eric Biggers CC: Andrew Morton CC: Jan Kara --- fs/mbcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/mbcache.c b/fs/mbcache.c index 49c5b25bfa8c..bf41e2e72c18 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -239,7 +239,9 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) spin_lock(&cache->c_list_lock); if (!list_empty(&entry->e_list)) { list_del_init(&entry->e_list); - cache->c_entry_count--; + if (!WARN_ONCE(cache->c_entry_count == 0, + "mbcache: attempt to decrement c_entry_count past zero")) + cache->c_entry_count--; atomic_dec(&entry->e_refcnt); } spin_unlock(&cache->c_list_lock); From abbc3f9395c76d554a9ed27d4b1ebfb5d9b0e4ca Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 10 Jan 2018 00:13:13 -0500 Subject: [PATCH 09/17] ext4: fix a race in the ext4 shutdown path This patch fixes a race between the shutdown path and bio completion handling. In the ext4 direct io path with async io, after submitting a bio to the block layer, if journal starting fails, ext4_direct_IO_write() would bail out pretending that the IO failed. The caller would have had no way of knowing whether or not the IO was successfully submitted. So instead, we return -EIOCBQUEUED in this case. Now, the caller knows that the IO was submitted. The bio completion handler takes care of the error. Tested: Ran the shutdown xfstest test 461 in loop for over 2 hours across 4 machines resulting in over 400 runs. Verified that the race didn't occur. Usually the race was seen in about 20-30 iterations. Signed-off-by: Harshad Shirwadkar Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 534a9130f625..4c2f8b57bdc7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3767,10 +3767,18 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) /* Credits for sb + inode write */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); + /* + * We wrote the data but cannot extend + * i_size. Bail out. In async io case, we do + * not return error here because we have + * already submmitted the corresponding + * bio. Returning error here makes the caller + * think that this IO is done and failed + * resulting in race with bio's completion + * handler. + */ + if (!ret) + ret = PTR_ERR(handle); if (inode->i_nlink) ext4_orphan_del(NULL, inode); From f69120ce6c024aa634a8fc25787205e42f0ccbe6 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 10 Jan 2018 00:27:29 -0500 Subject: [PATCH 10/17] jbd2: fix sphinx kernel-doc build warnings Sphinx emits various (26) warnings when building make target 'htmldocs'. Currently struct definitions contain duplicate documentation, some as kernel-docs and some as standard c89 comments. We can reduce duplication while cleaning up the kernel docs. Move all kernel-docs to right above each struct member. Use the set of all existing comments (kernel-doc and c89). Add documentation for missing struct members and function arguments. Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/jbd2/transaction.c | 5 +- include/linux/jbd2.h | 431 ++++++++++++++++++++++++++---------------- 2 files changed, 272 insertions(+), 164 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 881a9e09ca4e..ac311037d7a5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -492,8 +492,10 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * int jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start + * @type: for handle statistics + * @line_no: for handle statistics * * Start handle that has been previously reserved with jbd2_journal_reserve(). * This attaches @handle to the running transaction (or creates one if there's @@ -623,6 +625,7 @@ error_out: * int jbd2_journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested + * @gfp_mask: memory allocation flags (for start_this_handle) * * Restart a handle for a multi-transaction filesystem * operation. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 296d1e0ea87b..b708e5169d1d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -418,26 +418,41 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode is the structure linking inodes in ordered mode - * present in a transaction so that we can sync them during commit. + * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { - /* Which transaction does this inode belong to? Either the running - * transaction or the committing one. [j_list_lock] */ + /** + * @i_transaction: + * + * Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] + */ transaction_t *i_transaction; - /* Pointer to the running transaction modifying inode's data in case - * there is already a committing transaction touching it. [j_list_lock] */ + /** + * @i_next_transaction: + * + * Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] + */ transaction_t *i_next_transaction; - /* List of inodes in the i_transaction [j_list_lock] */ + /** + * @i_list: List of inodes in the i_transaction [j_list_lock] + */ struct list_head i_list; - /* VFS inode this inode belongs to [constant during the lifetime - * of the structure] */ + /** + * @i_vfs_inode: + * + * VFS inode this inode belongs to [constant for lifetime of structure] + */ struct inode *i_vfs_inode; - /* Flags of inode [j_list_lock] */ + /** + * @i_flags: Flags of inode [j_list_lock] + */ unsigned long i_flags; }; @@ -447,12 +462,20 @@ struct jbd2_revoke_table_s; * struct handle_s - The handle_s type is the concrete type associated with * handle_t. * @h_transaction: Which compound transaction is this update a part of? + * @h_journal: Which journal handle belongs to - used iff h_reserved set. + * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle + * @h_ref: Reference count on this handle. + * @h_err: Field for caller's use to track errors through large fs operations. + * @h_sync: Flag for sync-on-close. + * @h_jdata: Flag to force data journaling. + * @h_reserved: Flag for handle for reserved credits. + * @h_aborted: Flag indicating fatal error on handle. + * @h_type: For handle statistics. + * @h_line_no: For handle statistics. + * @h_start_jiffies: Handle Start time. + * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation @@ -462,32 +485,23 @@ struct jbd2_revoke_table_s; struct jbd2_journal_handle { union { - /* Which compound transaction is this update a part of? */ transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; - /* Handle reserved for finishing the logical operation */ handle_t *h_rsv_handle; - - /* Number of remaining buffers we are allowed to dirty: */ int h_buffer_credits; - - /* Reference count on this handle */ int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ int h_err; /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_reserved: 1; /* handle with reserved credits */ - unsigned int h_aborted: 1; /* fatal error on handle */ - unsigned int h_type: 8; /* for handle statistics */ - unsigned int h_line_no: 16; /* for handle statistics */ + unsigned int h_sync: 1; + unsigned int h_jdata: 1; + unsigned int h_reserved: 1; + unsigned int h_aborted: 1; + unsigned int h_type: 8; + unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; @@ -729,228 +743,253 @@ jbd2_time_diff(unsigned long start, unsigned long end) /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_reserved_credits: Number of buffers reserved from the running transaction - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history_lock: Protect the transactions statistics history - * @j_proc_entry: procfs entry for the jbd statistics directory - * @j_stats: Overall statistics - * @j_private: An opaque pointer to fs-private information. - * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies */ - struct journal_s { - /* General journaling state flags [j_state_lock] */ + /** + * @j_flags: General journaling state flags [j_state_lock] + */ unsigned long j_flags; - /* + /** + * @j_errno: + * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; - /* The superblock buffer */ + /** + * @j_sb_buffer: The first part of the superblock buffer. + */ struct buffer_head *j_sb_buffer; + + /** + * @j_superblock: The second part of the superblock buffer. + */ journal_superblock_t *j_superblock; - /* Version of the superblock format */ + /** + * @j_format_version: Version of the superblock format. + */ int j_format_version; - /* - * Protect the various scalars in the journal + /** + * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; - /* + /** + * @j_barrier_count: + * * Number of processes waiting to create a barrier lock [j_state_lock] */ int j_barrier_count; - /* The barrier lock itself */ + /** + * @j_barrier: The barrier lock itself. + */ struct mutex j_barrier; - /* + /** + * @j_running_transaction: + * * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] */ transaction_t *j_running_transaction; - /* + /** + * @j_committing_transaction: + * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; - /* + /** + * @j_checkpoint_transactions: + * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; - /* + /** + * @j_wait_transaction_locked: + * * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released + * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for commit to complete */ + /** + * @j_wait_done_commit: Wait queue for waiting for commit to complete. + */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger commit */ + /** + * @j_wait_commit: Wait queue to trigger commit. + */ wait_queue_head_t j_wait_commit; - /* Wait queue to wait for updates to complete */ + /** + * @j_wait_updates: Wait queue to wait for updates to complete. + */ wait_queue_head_t j_wait_updates; - /* Wait queue to wait for reserved buffer credits to drop */ + /** + * @j_wait_reserved: + * + * Wait queue to wait for reserved buffer credits to drop. + */ wait_queue_head_t j_wait_reserved; - /* Semaphore for locking against concurrent checkpoints */ + /** + * @j_checkpoint_mutex: + * + * Semaphore for locking against concurrent checkpoints. + */ struct mutex j_checkpoint_mutex; - /* + /** + * @j_chkpt_bhs: + * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the - * j_checkpoint_mutex. [j_checkpoint_mutex] + * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; - - /* + + /** + * @j_head: + * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; - /* + /** + * @j_tail: + * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; - /* + /** + * @j_free: + * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] + /** + * @j_first: + * + * The block number of the first usable block in the journal + * [j_state_lock]. */ unsigned long j_first; + + /** + * @j_last: + * + * The block number one beyond the last usable block in the journal + * [j_state_lock]. + */ unsigned long j_last; - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. + /** + * @j_dev: Device where we store the journal. */ struct block_device *j_dev; + + /** + * @j_blocksize: Block size for the location where we store the journal. + */ int j_blocksize; + + /** + * @j_blk_offset: + * + * Starting block offset into the device where we store the journal. + */ unsigned long long j_blk_offset; + + /** + * @j_devname: Journal device name. + */ char j_devname[BDEVNAME_SIZE+24]; - /* + /** + * @j_fs_dev: + * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; - /* Total maximum capacity of the journal region on disk. */ + /** + * @j_maxlen: Total maximum capacity of the journal region on disk. + */ unsigned int j_maxlen; - /* Number of buffers reserved from the running transaction */ + /** + * @j_reserved_credits: + * + * Number of buffers reserved from the running transaction. + */ atomic_t j_reserved_credits; - /* - * Protects the buffer lists and internal buffer state. + /** + * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ + /** + * @j_inode: + * + * Optional inode where we store the journal. If present, all + * journal block numbers are mapped into this inode via bmap(). + */ struct inode *j_inode; - /* + /** + * @j_tail_sequence: + * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; - /* + /** + * @j_transaction_sequence: + * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; - /* + /** + * @j_commit_sequence: + * * Sequence number of the most recently committed transaction * [j_state_lock]. */ tid_t j_commit_sequence; - /* + /** + * @j_commit_request: + * * Sequence number of the most recent transaction wanting commit * [j_state_lock] */ tid_t j_commit_request; - /* + /** + * @j_uuid: + * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -958,85 +997,151 @@ struct journal_s */ __u8 j_uuid[16]; - /* Pointer to the current commit thread for this journal */ + /** + * @j_task: Pointer to the current commit thread for this journal. + */ struct task_struct *j_task; - /* + /** + * @j_max_transaction_buffers: + * * Maximum number of metadata buffers to allow in a single compound - * commit transaction + * commit transaction. */ int j_max_transaction_buffers; - /* + /** + * @j_commit_interval: + * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; - /* The timer used to wakeup the commit thread: */ + /** + * @j_commit_timer: The timer used to wakeup the commit thread. + */ struct timer_list j_commit_timer; - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] + /** + * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; + + /** + * @j_revoke: + * + * The revoke table - maintains the list of revoked blocks in the + * current transaction. + */ struct jbd2_revoke_table_s *j_revoke; + + /** + * @j_revoke_table: Alternate revoke tables for j_revoke. + */ struct jbd2_revoke_table_s *j_revoke_table[2]; - /* - * array of bhs for jbd2_journal_commit_transaction + /** + * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; + + /** + * @j_wbufsize: + * + * Size of @j_wbuf array. + */ int j_wbufsize; - /* - * this is the pid of hte last person to run a synchronous operation - * through the journal + /** + * @j_last_sync_writer: + * + * The pid of the last person to run a synchronous operation + * through the journal. */ pid_t j_last_sync_writer; - /* - * the average amount of time in nanoseconds it takes to commit a + /** + * @j_average_commit_time: + * + * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; - /* - * minimum and maximum times that we should wait for - * additional filesystem operations to get batched into a - * synchronous handle in microseconds + /** + * @j_min_batch_time: + * + * Minimum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; + + /** + * @j_max_batch_time: + * + * Maximum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. + */ u32 j_max_batch_time; - /* This function is called when a transaction is closed */ + /** + * @j_commit_callback: + * + * This function is called when a transaction is closed. + */ void (*j_commit_callback)(journal_t *, transaction_t *); /* * Journal statistics */ + + /** + * @j_history_lock: Protect the transactions statistics history. + */ spinlock_t j_history_lock; + + /** + * @j_proc_entry: procfs entry for the jbd statistics directory. + */ struct proc_dir_entry *j_proc_entry; + + /** + * @j_stats: Overall statistics. + */ struct transaction_stats_s j_stats; - /* Failed journal commit ID */ + /** + * @j_failed_commit: Failed journal commit ID. + */ unsigned int j_failed_commit; - /* + /** + * @j_private: + * * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here + * superblock pointer here. */ void *j_private; - /* Reference to checksum algorithm driver via cryptoapi */ + /** + * @j_chksum_driver: + * + * Reference to checksum algorithm driver via cryptoapi. + */ struct crypto_shash *j_chksum_driver; - /* Precomputed journal UUID checksum for seeding other checksums */ + /** + * @j_csum_seed: + * + * Precomputed journal UUID checksum for seeding other checksums. + */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC - /* + /** + * @j_trans_commit_map: + * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling From 06f29cc81f0350261f59643a505010531130eea0 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 10 Jan 2018 00:34:19 -0500 Subject: [PATCH 11/17] ext4: save error to disk in __ext4_grp_locked_error() In the function __ext4_grp_locked_error(), __save_error_info() is called to save error info in super block block, but does not sync that information to disk to info the subsequence fsck after reboot. This patch writes the error information to disk. After this patch, I think there is no obvious EXT4 error handle branches which leads to "Remounting filesystem read-only" will leave the disk partition miss the subsequence fsck. Signed-off-by: Zhouyi Zhou Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2a7faf94bb90..be71f24a75a0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -743,6 +743,7 @@ __acquires(bitlock) } ext4_unlock_group(sb, grp); + ext4_commit_super(sb, 1); ext4_handle_error(sb); /* * We only get here in the ERRORS_RO case; relocking the group From 49598e04b5a1c3679cbee0dda8c2a0461e9bb320 Mon Sep 17 00:00:00 2001 From: Jun Piao Date: Thu, 11 Jan 2018 13:17:49 -0500 Subject: [PATCH 12/17] ext4: use 'sbi' instead of 'EXT4_SB(sb)' We could use 'sbi' instead of 'EXT4_SB(sb)' to make code more elegant. Signed-off-by: Jun Piao Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/balloc.c | 4 ++-- fs/ext4/block_validity.c | 6 +++--- fs/ext4/ialloc.c | 4 ++-- fs/ext4/mballoc.c | 14 +++++++------- fs/ext4/resize.c | 2 +- fs/ext4/super.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a943e568292e..f9b3e0a83526 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -355,10 +355,10 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), + EXT4_B2C(sbi, offset + sbi->s_itb_per_group), EXT4_B2C(sbi, offset)); if (next_zero_bit < - EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group)) + EXT4_B2C(sbi, offset + sbi->s_itb_per_group)) /* bad bitmap for inode tables */ return blk; return 0; diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index bee888e0e2db..913061c0de1b 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -147,11 +147,11 @@ int ext4_setup_system_zone(struct super_block *sb) int ret; if (!test_opt(sb, BLOCK_VALIDITY)) { - if (EXT4_SB(sb)->system_blks.rb_node) + if (sbi->system_blks.rb_node) ext4_release_system_zone(sb); return 0; } - if (EXT4_SB(sb)->system_blks.rb_node) + if (sbi->system_blks.rb_node) return 0; for (i=0; i < ngroups; i++) { @@ -173,7 +173,7 @@ int ext4_setup_system_zone(struct super_block *sb) } if (test_opt(sb, DEBUG)) - debug_print_tree(EXT4_SB(sb)); + debug_print_tree(sbi); return 0; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b32cf263750d..7830d28df331 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -303,7 +303,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) /* Do this BEFORE marking the inode not in use or returning an error */ ext4_clear_inode(inode); - es = EXT4_SB(sb)->s_es; + es = sbi->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext4_error(sb, "reserved or nonexistent inode %lu", ino); goto error_return; @@ -1157,7 +1157,7 @@ got: ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ ext4_set_inode_state(inode, EXT4_STATE_NEW); - ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; + ei->i_extra_isize = sbi->s_want_extra_isize; ei->i_inline_off = 0; if (ext4_has_feature_inline_data(sb)) ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1a7ea5f9276f..769a62708b1c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -757,10 +757,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); period = get_cycles() - period; - spin_lock(&EXT4_SB(sb)->s_bal_lock); - EXT4_SB(sb)->s_mb_buddies_generated++; - EXT4_SB(sb)->s_mb_generation_time += period; - spin_unlock(&EXT4_SB(sb)->s_bal_lock); + spin_lock(&sbi->s_bal_lock); + sbi->s_mb_buddies_generated++; + sbi->s_mb_generation_time += period; + spin_unlock(&sbi->s_bal_lock); } static void mb_regenerate_buddy(struct ext4_buddy *e4b) @@ -1447,7 +1447,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ext4_fsblk_t blocknr; blocknr = ext4_group_first_block_no(sb, e4b->bd_group); - blocknr += EXT4_C2B(EXT4_SB(sb), block); + blocknr += EXT4_C2B(sbi, block); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, @@ -4838,9 +4838,9 @@ do_more: if (in_range(ext4_block_bitmap(sb, gdp), block, count) || in_range(ext4_inode_bitmap(sb, gdp), block, count) || in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || + sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { + sbi->s_itb_per_group)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 50443bda8e98..b6bec270a8e4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1477,7 +1477,7 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit_journal; group = flex_gd->groups[0].group; - BUG_ON(group != EXT4_SB(sb)->s_groups_count); + BUG_ON(group != sbi->s_groups_count); err = ext4_add_new_descs(handle, sb, group, resize_inode, flex_gd->count); if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index be71f24a75a0..e82e35ae5cf9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2677,7 +2677,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, * compensate. */ if (sb->s_blocksize == 1024 && nr == 0 && - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) + le32_to_cpu(sbi->s_es->s_first_data_block) == 0) has_super++; return (has_super + ext4_group_first_block_no(sb, bg)); @@ -3122,7 +3122,7 @@ int ext4_register_li_request(struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr = NULL; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = sbi->s_groups_count; int ret = 0; mutex_lock(&ext4_li_mtx); @@ -4837,7 +4837,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + if (unlikely(ext4_forced_shutdown(sbi))) return 0; trace_ext4_sync_fs(sb, wait); From 9f0372488cc9243018a812e8cfbf27de650b187b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Thu, 11 Jan 2018 13:43:33 -0500 Subject: [PATCH 13/17] ext4: correct documentation for grpid mount option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The grpid option is currently described as being the same as nogrpid. Signed-off-by: Ernesto A. Fernández Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- Documentation/filesystems/ext4.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 75236c0c2ac2..d081ce0482cc 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -233,7 +233,7 @@ data_err=ignore(*) Just print an error message if an error occurs data_err=abort Abort the journal if an error occurs in a file data buffer in ordered mode. -grpid Give objects the same group ID as their creator. +grpid New objects have the group ID of their parent. bsdgroups nogrpid (*) New objects have the group ID of their creator. From a794df0ecdd557961a0302062f5a7a5f500a8542 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Jan 2018 14:17:30 -0500 Subject: [PATCH 14/17] ext4: fix incorrect indentation of if statement The indentation is incorrect and spaces need replacing with a tab on the if statement. Cleans up smatch warning: fs/ext4/namei.c:3220 ext4_link() warn: inconsistent indenting Signed-off-by: Colin Ian King Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e750d68fbcb5..c5696a93a3a6 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3221,9 +3221,9 @@ static int ext4_link(struct dentry *old_dentry, if (err) return err; - if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && - (!projid_eq(EXT4_I(dir)->i_projid, - EXT4_I(old_dentry->d_inode)->i_projid))) + if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && + (!projid_eq(EXT4_I(dir)->i_projid, + EXT4_I(old_dentry->d_inode)->i_projid))) return -EXDEV; err = dquot_initialize(dir); From 95c4df029374d8c09ad36c961e7a14a1d3ac6a6f Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Thu, 11 Jan 2018 14:28:13 -0500 Subject: [PATCH 15/17] ext4: release kobject/kset even when init/register fail Even when kobject_init_and_add/kset_register fail, the kobject has been already initialized and the refcount set to 1. Thus it is necessary to release the kobject/kset, to avoid the memory associated with it hanging around forever. Signed-off-by: Riccardo Schirone Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index e21afd52e7d7..4389fe4893c2 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -396,8 +396,11 @@ int ext4_register_sysfs(struct super_block *sb) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); @@ -430,15 +433,19 @@ int __init ext4_init_sysfs(void) kobject_set_name(&ext4_kset.kobj, "ext4"); ext4_kset.kobj.parent = fs_kobj; ret = kset_register(&ext4_kset); - if (ret) + if (ret) { + kset_unregister(&ext4_kset); return ret; + } ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&ext4_feat); kset_unregister(&ext4_kset); - else + } else { ext4_proc_root = proc_mkdir(proc_dirname, NULL); + } return ret; } From b99fee58a20ab8e0557cce87b6f187e325993142 Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Thu, 11 Jan 2018 15:11:32 -0500 Subject: [PATCH 16/17] ext4: create ext4_feat kobject dynamically kobjects should always be allocated dynamically, because it is unknown to whoever creates them when kobjects can be released. Signed-off-by: Riccardo Schirone Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 4389fe4893c2..192ade7d6fe6 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "ext4.h" @@ -351,11 +352,10 @@ static struct kset ext4_kset = { static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, .sysfs_ops = &ext4_attr_ops, + .release = (void (*)(struct kobject *))kfree, }; -static struct kobject ext4_feat = { - .kset = &ext4_kset, -}; +static struct kobject *ext4_feat; #define PROC_FILE_SHOW_DEFN(name) \ static int name##_open(struct inode *inode, struct file *file) \ @@ -438,20 +438,31 @@ int __init ext4_init_sysfs(void) return ret; } - ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, - NULL, "features"); - if (ret) { - kobject_put(&ext4_feat); - kset_unregister(&ext4_kset); - } else { - ext4_proc_root = proc_mkdir(proc_dirname, NULL); + ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL); + if (!ext4_feat) { + ret = -ENOMEM; + goto kset_err; } + + ext4_feat->kset = &ext4_kset; + ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype, + NULL, "features"); + if (ret) + goto feat_err; + + ext4_proc_root = proc_mkdir(proc_dirname, NULL); + return ret; + +feat_err: + kobject_put(ext4_feat); +kset_err: + kset_unregister(&ext4_kset); return ret; } void ext4_exit_sysfs(void) { - kobject_put(&ext4_feat); + kobject_put(ext4_feat); kset_unregister(&ext4_kset); remove_proc_entry(proc_dirname, NULL); ext4_proc_root = NULL; From 5dc397113d195a004899ee672c5a8a19809495ba Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Thu, 11 Jan 2018 15:34:04 -0500 Subject: [PATCH 17/17] ext4: create ext4_kset dynamically ksets contain a kobject and they should always be allocated dynamically, because it is unknown to whoever creates them when ksets can be released. Signed-off-by: Riccardo Schirone Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 192ade7d6fe6..1205261f130c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -330,6 +330,13 @@ static void ext4_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +static void ext4_kset_release(struct kobject *kobj) +{ + struct kset *kset = container_of(kobj, struct kset, kobj); + + kfree(kset); +} + static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, @@ -343,11 +350,10 @@ static struct kobj_type ext4_sb_ktype = { static struct kobj_type ext4_ktype = { .sysfs_ops = &ext4_attr_ops, + .release = ext4_kset_release, }; -static struct kset ext4_kset = { - .kobj = {.ktype = &ext4_ktype}, -}; +static struct kset *ext4_kset; static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, @@ -392,7 +398,7 @@ int ext4_register_sysfs(struct super_block *sb) const struct ext4_proc_files *p; int err; - sbi->s_kobj.kset = &ext4_kset; + sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, "%s", sb->s_id); @@ -430,13 +436,16 @@ int __init ext4_init_sysfs(void) { int ret; - kobject_set_name(&ext4_kset.kobj, "ext4"); - ext4_kset.kobj.parent = fs_kobj; - ret = kset_register(&ext4_kset); - if (ret) { - kset_unregister(&ext4_kset); - return ret; - } + ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL); + if (!ext4_kset) + return -ENOMEM; + + kobject_set_name(&ext4_kset->kobj, "ext4"); + ext4_kset->kobj.parent = fs_kobj; + ext4_kset->kobj.ktype = &ext4_ktype; + ret = kset_register(ext4_kset); + if (ret) + goto kset_err; ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL); if (!ext4_feat) { @@ -444,7 +453,7 @@ int __init ext4_init_sysfs(void) goto kset_err; } - ext4_feat->kset = &ext4_kset; + ext4_feat->kset = ext4_kset; ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype, NULL, "features"); if (ret) @@ -456,14 +465,16 @@ int __init ext4_init_sysfs(void) feat_err: kobject_put(ext4_feat); kset_err: - kset_unregister(&ext4_kset); + kset_unregister(ext4_kset); + ext4_kset = NULL; return ret; } void ext4_exit_sysfs(void) { kobject_put(ext4_feat); - kset_unregister(&ext4_kset); + kset_unregister(ext4_kset); + ext4_kset = NULL; remove_proc_entry(proc_dirname, NULL); ext4_proc_root = NULL; }