From b469d4329cf949043f9b93a6644f2c64015ef8cd Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Wed, 11 Jan 2012 05:51:10 +0000 Subject: [PATCH 001/316] kmemleak: Only scan non-zero-size areas Kmemleak should only track valid scan areas with a non-zero size. Otherwise, such area may reside just at the end of an object and kmemleak would report "Adding scan area to unknown object". Signed-off-by: Tiejun Chen Signed-off-by: Catalin Marinas --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c833addd94d7..f9f7310f0fdb 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1036,7 +1036,7 @@ void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) { pr_debug("%s(0x%p)\n", __func__, ptr); - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + if (atomic_read(&kmemleak_enabled) && ptr && size && !IS_ERR(ptr)) add_scan_area((unsigned long)ptr, size, gfp); else if (atomic_read(&kmemleak_early_log)) log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0); From b370d29ea7565a638ccf85389488364b5abb39fa Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 20 Jan 2012 10:42:40 +0000 Subject: [PATCH 002/316] kmemleak: Disable early logging when kmemleak is off by default Commit b6693005 (kmemleak: When the early log buffer is exceeded, report the actual number) deferred the disabling of the early logging to kmemleak_init(). However, when CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the early logging was no longer disabled causing __init kmemleak functions to be called even after the kernel freed the init memory. This patch disables the early logging during kmemleak_init() if kmemleak is left disabled. Reported-by: Dirk Gouders Tested-by: Dirk Gouders Tested-by: Josh Boyer Signed-off-by: Catalin Marinas --- mm/kmemleak.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f9f7310f0fdb..45eb6217bf38 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1757,6 +1757,7 @@ void __init kmemleak_init(void) #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF if (!kmemleak_skip_disable) { + atomic_set(&kmemleak_early_log, 0); kmemleak_disable(); return; } From 9fb83526a898f14adbd3f6f52fa7126f528f15ac Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 25 Jan 2012 15:19:20 +0000 Subject: [PATCH 003/316] ASoC: wm5100: Make sure we switch to button reporting mode When we have identified an accessory make sure we've flagged that we've done so in order to make sure we always report buttons and don't continue to polarity flip. Signed-off-by: Mark Brown --- sound/soc/codecs/wm5100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index 66f0611e68b6..3f8fd3ca9454 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -2183,6 +2183,7 @@ static void wm5100_micd_irq(struct snd_soc_codec *codec) if (wm5100->jack_detecting) { dev_dbg(codec->dev, "Microphone detected\n"); wm5100->jack_mic = true; + wm5100->jack_detecting = false; snd_soc_jack_report(wm5100->jack, SND_JACK_HEADSET, SND_JACK_HEADSET | SND_JACK_BTN_0); @@ -2221,6 +2222,7 @@ static void wm5100_micd_irq(struct snd_soc_codec *codec) SND_JACK_BTN_0); } else if (wm5100->jack_detecting) { dev_dbg(codec->dev, "Headphone detected\n"); + wm5100->jack_detecting = false; snd_soc_jack_report(wm5100->jack, SND_JACK_HEADPHONE, SND_JACK_HEADPHONE); From a188fcba73837f83a78dc90a44998a978f50ac83 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 25 Jan 2012 17:57:16 +0000 Subject: [PATCH 004/316] ASoC: wm5100: Fix microphone configuration We need to write the configuration for each microphone to a different register. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm5100.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index 3f8fd3ca9454..fb757af19363 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -2612,6 +2612,13 @@ static const struct regmap_config wm5100_regmap = { .cache_type = REGCACHE_RBTREE, }; +static const unsigned int wm5100_mic_ctrl_reg[] = { + WM5100_IN1L_CONTROL, + WM5100_IN2L_CONTROL, + WM5100_IN3L_CONTROL, + WM5100_IN4L_CONTROL, +}; + static __devinit int wm5100_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -2744,7 +2751,7 @@ static __devinit int wm5100_i2c_probe(struct i2c_client *i2c, } for (i = 0; i < ARRAY_SIZE(wm5100->pdata.in_mode); i++) { - regmap_update_bits(wm5100->regmap, WM5100_IN1L_CONTROL, + regmap_update_bits(wm5100->regmap, wm5100_mic_ctrl_reg[i], WM5100_IN1_MODE_MASK | WM5100_IN1_DMIC_SUP_MASK, (wm5100->pdata.in_mode[i] << From 1b76d2ee4012f325ae14e0e71dad1a0835195906 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 25 Jan 2012 21:10:07 +0000 Subject: [PATCH 005/316] ASoC: wm8996: Mark register cache as dirty when regulators are disabled Otherwise we won't resync later. Signed-off-by: Mark Brown --- sound/soc/codecs/wm8996.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index 13aa2bdaa7d7..61f7daa4d0e6 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -108,7 +108,7 @@ static int wm8996_regulator_event_##n(struct notifier_block *nb, \ struct wm8996_priv *wm8996 = container_of(nb, struct wm8996_priv, \ disable_nb[n]); \ if (event & REGULATOR_EVENT_DISABLE) { \ - regcache_cache_only(wm8996->regmap, true); \ + regcache_mark_dirty(wm8996->regmap); \ } \ return 0; \ } From 5539a102882d5ddd1bb95ea9f6f43130a789cb7f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 25 Jan 2012 21:10:21 +0000 Subject: [PATCH 006/316] ASoC: wm8962: Mark register cache as dirty when regulators are disabled Otherwise we won't resync later. Signed-off-by: Mark Brown --- sound/soc/codecs/wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 296de4e30d26..bda3da887d7e 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -96,7 +96,7 @@ static int wm8962_regulator_event_##n(struct notifier_block *nb, \ struct wm8962_priv *wm8962 = container_of(nb, struct wm8962_priv, \ disable_nb[n]); \ if (event & REGULATOR_EVENT_DISABLE) { \ - regcache_cache_only(wm8962->regmap, true); \ + regcache_mark_dirty(wm8962->regmap); \ } \ return 0; \ } From 5c1b136b7bf702e550039cb0039ec9c790c48f99 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 25 Jan 2012 21:10:33 +0000 Subject: [PATCH 007/316] ASoC: wm5100: Mark register cache as dirty when regulators are disabled Otherwise we won't resync later. Signed-off-by: Mark Brown --- sound/soc/codecs/wm5100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index fb757af19363..89f2af77b1c3 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -1405,6 +1405,7 @@ static int wm5100_set_bias_level(struct snd_soc_codec *codec, case SND_SOC_BIAS_OFF: regcache_cache_only(wm5100->regmap, true); + regcache_mark_dirty(wm5100->regmap); if (wm5100->pdata.ldo_ena) gpio_set_value_cansleep(wm5100->pdata.ldo_ena, 0); regulator_bulk_disable(ARRAY_SIZE(wm5100->core_supplies), From 4ca9b72b71f10147bd21969c1805f5b2c4ca7b7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Jan 2012 11:50:51 +0100 Subject: [PATCH 008/316] sched: Fix rq->nr_uninterruptible update race KOSAKI Motohiro noticed the following race: > CPU0 CPU1 > -------------------------------------------------------- > deactivate_task() > task->state = TASK_UNINTERRUPTIBLE; > activate_task() > rq->nr_uninterruptible--; > > schedule() > deactivate_task() > rq->nr_uninterruptible++; > Kosaki-San's scenario is possible when CPU0 runs __sched_setscheduler() against CPU1's current @task. __sched_setscheduler() does a dequeue/enqueue in order to move the task to its new queue (position) to reflect the newly provided scheduling parameters. However it should be completely invariant to nr_uninterruptible accounting, sched_setscheduler() doesn't affect readyness to run, merely policy on when to run. So convert the inappropriate activate/deactivate_task usage to enqueue/dequeue_task, which avoids the nr_uninterruptible accounting. Also convert the two other sites: __migrate_task() and normalize_task() that still use activate/deactivate_task. These sites aren't really a problem since __migrate_task() will only be called on non-running task (and therefore are immume to the described problem) and normalize_task() isn't ever used on regular systems. Also remove the comments from activate/deactivate_task since they're misleading at best. Reported-by: KOSAKI Motohiro Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1327486224.2614.45.camel@laptop Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df00cb09263e..e067df1fd01a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -723,9 +723,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) p->sched_class->dequeue_task(rq, p, flags); } -/* - * activate_task - move a task to the runqueue. - */ void activate_task(struct rq *rq, struct task_struct *p, int flags) { if (task_contributes_to_load(p)) @@ -734,9 +731,6 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) enqueue_task(rq, p, flags); } -/* - * deactivate_task - remove a task from the runqueue. - */ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { if (task_contributes_to_load(p)) @@ -4134,7 +4128,7 @@ recheck: on_rq = p->on_rq; running = task_current(rq, p); if (on_rq) - deactivate_task(rq, p, 0); + dequeue_task(rq, p, 0); if (running) p->sched_class->put_prev_task(rq, p); @@ -4147,7 +4141,7 @@ recheck: if (running) p->sched_class->set_curr_task(rq); if (on_rq) - activate_task(rq, p, 0); + enqueue_task(rq, p, 0); check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, p, &flags); @@ -4998,9 +4992,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) * placed properly. */ if (p->on_rq) { - deactivate_task(rq_src, p, 0); + dequeue_task(rq_src, p, 0); set_task_cpu(p, dest_cpu); - activate_task(rq_dest, p, 0); + enqueue_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p, 0); } done: @@ -7032,10 +7026,10 @@ static void normalize_task(struct rq *rq, struct task_struct *p) on_rq = p->on_rq; if (on_rq) - deactivate_task(rq, p, 0); + dequeue_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(rq, p, 0); + enqueue_task(rq, p, 0); resched_task(rq->curr); } From db7e527da41560f597ccdc4417cefa6b7657c0c0 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 11 Jan 2012 08:58:16 +0100 Subject: [PATCH 009/316] sched/s390: Fix compile error in sched/core.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 029632fbb7b7c9d85063cc9eb470de6c54873df3 ("sched: Make separate sched*.c translation units") removed the include of asm/mutex.h from sched.c. This breaks the combination of: CONFIG_MUTEX_SPIN_ON_OWNER=yes CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX=yes like s390 without mutex debugging: CC kernel/sched/core.o kernel/sched/core.c: In function ‘mutex_spin_on_owner’: kernel/sched/core.c:3287: error: implicit declaration of function ‘arch_mutex_cpu_relax’ Lets re-add the include to kernel/sched/core.c Signed-off-by: Christian Borntraeger Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1326268696-30904-1-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e067df1fd01a..5255c9d2e053 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -74,6 +74,7 @@ #include #include +#include #ifdef CONFIG_PARAVIRT #include #endif From 71325960d16cd68ea0e22a8da15b2495b0f363f7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 19 Jan 2012 18:28:57 -0800 Subject: [PATCH 010/316] sched/nohz: Fix nohz cpu idle load balancing state with cpu hotplug With the recent nohz scheduler changes, rq's nohz flag 'NOHZ_TICK_STOPPED' and its associated state doesn't get cleared immediately after the cpu exits idle. This gets cleared as part of the next tick seen on that cpu. For the cpu offline support, we need to clear this state manually. Fix it by registering a cpu notifier, which clears the nohz idle load balance state for this rq explicitly during the CPU_DYING notification. There won't be any nohz updates for that cpu, after the CPU_DYING notification. But lets be extra paranoid and skip updating the nohz state in the select_nohz_load_balancer() if the cpu is not in active state anymore. Reported-by: Srivatsa S. Bhat Reviewed-and-tested-by: Srivatsa S. Bhat Tested-by: Sergey Senozhatsky Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1327026538.16150.40.camel@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 84adb2d66cbd..7c6414fc669d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4866,6 +4866,15 @@ static void nohz_balancer_kick(int cpu) return; } +static inline void clear_nohz_tick_stopped(int cpu) +{ + if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { + cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); + } +} + static inline void set_cpu_sd_state_busy(void) { struct sched_domain *sd; @@ -4904,6 +4913,12 @@ void select_nohz_load_balancer(int stop_tick) { int cpu = smp_processor_id(); + /* + * If this cpu is going down, then nothing needs to be done. + */ + if (!cpu_active(cpu)) + return; + if (stop_tick) { if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) return; @@ -4914,6 +4929,18 @@ void select_nohz_load_balancer(int stop_tick) } return; } + +static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DYING: + clear_nohz_tick_stopped(smp_processor_id()); + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} #endif static DEFINE_SPINLOCK(balancing); @@ -5070,11 +5097,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) * busy tick after returning from idle, we will update the busy stats. */ set_cpu_sd_state_busy(); - if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { - clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); - cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); - } + clear_nohz_tick_stopped(cpu); /* * None are in tickless mode and hence no need for NOHZ idle load @@ -5590,6 +5613,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_NO_HZ zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); + cpu_notifier(sched_ilb_notifier, 0); #endif #endif /* SMP */ From 77231abe55433aa17eca712718745275853fa66d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 Jan 2012 12:19:43 +0000 Subject: [PATCH 011/316] ASoC: wm_hubs: Enable line out VMID buffer for single ended line outputs For optimal performance the single ended line outputs require that the line output VMID buffer be enabled. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_hubs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 2a61094075f8..9ccc416d8cbc 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -613,6 +613,8 @@ SND_SOC_DAPM_INPUT("IN2RP:VXRP"), SND_SOC_DAPM_SUPPLY("MICBIAS2", WM8993_POWER_MANAGEMENT_1, 5, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("MICBIAS1", WM8993_POWER_MANAGEMENT_1, 4, 0, NULL, 0), +SND_SOC_DAPM_SUPPLY("LINEOUT_VMID_BUF", WM8993_ANTIPOP1, 7, 0, NULL, 0), + SND_SOC_DAPM_MIXER("IN1L PGA", WM8993_POWER_MANAGEMENT_2, 6, 0, in1l_pga, ARRAY_SIZE(in1l_pga)), SND_SOC_DAPM_MIXER("IN1R PGA", WM8993_POWER_MANAGEMENT_2, 4, 0, @@ -834,9 +836,11 @@ static const struct snd_soc_dapm_route lineout1_diff_routes[] = { }; static const struct snd_soc_dapm_route lineout1_se_routes[] = { + { "LINEOUT1N Mixer", NULL, "LINEOUT_VMID_BUF" }, { "LINEOUT1N Mixer", "Left Output Switch", "Left Output PGA" }, { "LINEOUT1N Mixer", "Right Output Switch", "Right Output PGA" }, + { "LINEOUT1P Mixer", NULL, "LINEOUT_VMID_BUF" }, { "LINEOUT1P Mixer", "Left Output Switch", "Left Output PGA" }, { "LINEOUT1N Driver", NULL, "LINEOUT1N Mixer" }, @@ -853,9 +857,11 @@ static const struct snd_soc_dapm_route lineout2_diff_routes[] = { }; static const struct snd_soc_dapm_route lineout2_se_routes[] = { + { "LINEOUT2N Mixer", NULL, "LINEOUT_VMID_BUF" }, { "LINEOUT2N Mixer", "Left Output Switch", "Left Output PGA" }, { "LINEOUT2N Mixer", "Right Output Switch", "Right Output PGA" }, + { "LINEOUT2P Mixer", NULL, "LINEOUT_VMID_BUF" }, { "LINEOUT2P Mixer", "Right Output Switch", "Right Output PGA" }, { "LINEOUT2N Driver", NULL, "LINEOUT2N Mixer" }, From fc395b9291925b1880e0afc61274fe2f6ddc1269 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 26 Jan 2012 15:47:37 +0000 Subject: [PATCH 012/316] x86: Properly parenthesize cmpxchg() macro arguments Quite oddly, all of the arguments passed through from the top level macros to the second level which didn't need parentheses had them, while the only expression (involving a parameter) needing them didn't. Very recently I got bitten by the lack thereof when using something like "array + index" for the first operand, with "array" being an array more narrow than int. Signed-off-by: Jan Beulich Cc: Linus Torvalds Link: http://lkml.kernel.org/r/4F2183A9020000780006F3E6@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cmpxchg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 0c9fa2745f13..b3b733262909 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -145,13 +145,13 @@ extern void __add_wrong_size(void) #ifdef __HAVE_ARCH_CMPXCHG #define cmpxchg(ptr, old, new) \ - __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + __cmpxchg(ptr, old, new, sizeof(*(ptr))) #define sync_cmpxchg(ptr, old, new) \ - __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) #define cmpxchg_local(ptr, old, new) \ - __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) + __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) #endif /* From b0f4c4b32c8e3aa0d44fc4dd6c40a9a9a8d66b63 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 26 Jan 2012 08:55:34 -0500 Subject: [PATCH 013/316] bugs, x86: Fix printk levels for panic, softlockups and stack dumps rsyslog will display KERN_EMERG messages on a connected terminal. However, these messages are useless/undecipherable for a general user. For example, after a softlockup we get: Message from syslogd@intel-s3e37-04 at Jan 25 14:18:06 ... kernel:Stack: Message from syslogd@intel-s3e37-04 at Jan 25 14:18:06 ... kernel:Call Trace: Message from syslogd@intel-s3e37-04 at Jan 25 14:18:06 ... kernel:Code: ff ff a8 08 75 25 31 d2 48 8d 86 38 e0 ff ff 48 89 d1 0f 01 c8 0f ae f0 48 8b 86 38 e0 ff ff a8 08 75 08 b1 01 4c 89 e0 0f 01 c9 ea 69 dd ff 4c 29 e8 48 89 c7 e8 0f bc da ff 49 89 c4 49 89 This happens because the printk levels for these messages are incorrect. Only an informational message should be displayed on a terminal. I modified the printk levels for various messages in the kernel and tested the output by using the drivers/misc/lkdtm.c kernel modules (ie, softlockups, panics, hard lockups, etc.) and confirmed that the console output was still the same and that the output to the terminals was correct. For example, in the case of a softlockup we now see the much more informative: Message from syslogd@intel-s3e37-04 at Jan 25 10:18:06 ... BUG: soft lockup - CPU4 stuck for 60s! instead of the above confusing messages. AFAICT, the messages no longer have to be KERN_EMERG. In the most important case of a panic we set console_verbose(). As for the other less severe cases the correct data is output to the console and /var/log/messages. Successfully tested by me using the drivers/misc/lkdtm.c module. Signed-off-by: Prarit Bhargava Cc: dzickus@redhat.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1327586134-11926-1-git-send-email-prarit@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 3 ++- arch/x86/kernel/dumpstack_64.c | 6 +++--- arch/x86/mm/fault.c | 4 ++-- kernel/watchdog.c | 2 +- lib/bug.c | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1aae78f775fc..4025fe4f928f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -252,7 +252,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) unsigned short ss; unsigned long sp; #endif - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); + printk(KERN_DEFAULT + "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); #endif diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6d728d9284bd..42b2bca0b72c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs) unsigned char c; u8 *ip; - printk(KERN_EMERG "Stack:\n"); + printk(KERN_DEFAULT "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - 0, KERN_EMERG); + 0, KERN_DEFAULT); - printk(KERN_EMERG "Code: "); + printk(KERN_DEFAULT "Code: "); ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9d74824a708d..f0b4caf85c1a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -673,7 +673,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, stackend = end_of_stack(tsk); if (tsk != &init_task && *stackend != STACK_END_MAGIC) - printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; tsk->thread.trap_no = 14; @@ -684,7 +684,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, sig = 0; /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); + printk(KERN_DEFAULT "CR2: %016lx\n", address); oops_end(flags, regs, sig); } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1d7bca7f4f52..d117262deba3 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -296,7 +296,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; - printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); print_modules(); diff --git a/lib/bug.c b/lib/bug.c index 19552096d16b..a28c1415357c 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -169,7 +169,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_WARN; } - printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_DEFAULT "------------[ cut here ]------------\n"); if (file) printk(KERN_CRIT "kernel BUG at %s:%u!\n", From d1bb399ad03c11e792f6dea198d3b1e23061f094 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 26 Jan 2012 22:05:58 +0100 Subject: [PATCH 014/316] firewire: ohci: add reset packet quirk for SB Audigy The Audigy's SB1394 controller is actually from Texas Instruments and has the same bus reset packet generation bug, so it needs the same quirk entry. Signed-off-by: Clemens Ladisch Cc: 2.6.36+ Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 6628feaa7622..21250eca28d1 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -263,6 +263,7 @@ static inline struct fw_ohci *fw_ohci(struct fw_card *card) static char ohci_driver_name[] = KBUILD_MODNAME; #define PCI_DEVICE_ID_AGERE_FW643 0x5901 +#define PCI_DEVICE_ID_CREATIVE_SB1394 0x4001 #define PCI_DEVICE_ID_JMICRON_JMB38X_FW 0x2380 #define PCI_DEVICE_ID_TI_TSB12LV22 0x8009 #define PCI_DEVICE_ID_TI_TSB12LV26 0x8020 @@ -289,6 +290,9 @@ static const struct { {PCI_VENDOR_ID_ATT, PCI_DEVICE_ID_AGERE_FW643, 6, QUIRK_NO_MSI}, + {PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_SB1394, PCI_ANY_ID, + QUIRK_RESET_PACKET}, + {PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB38X_FW, PCI_ANY_ID, QUIRK_NO_MSI}, From b5740f4b2cb3503b436925eb2242bc3d75cd3dfe Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 17 Jan 2012 17:40:31 +0900 Subject: [PATCH 015/316] sched: Fix ancient race in do_exit() try_to_wake_up() has a problem which may change status from TASK_DEAD to TASK_RUNNING in race condition with SMI or guest environment of virtual machine. As a result, exited task is scheduled() again and panic occurs. Here is the sequence how it occurs: ----------------------------------+----------------------------- | CPU A | CPU B ----------------------------------+----------------------------- TASK A calls exit().... do_exit() exit_mm() down_read(mm->mmap_sem); rwsem_down_failed_common() set TASK_UNINTERRUPTIBLE set waiter.task <= task A list_add to sem->wait_list : raw_spin_unlock_irq() (I/O interruption occured) __rwsem_do_wake(mmap_sem) list_del(&waiter->list); waiter->task = NULL wake_up_process(task A) try_to_wake_up() (task is still TASK_UNINTERRUPTIBLE) p->on_rq is still 1.) ttwu_do_wakeup() (*A) : (I/O interruption handler finished) if (!waiter.task) schedule() is not called due to waiter.task is NULL. tsk->state = TASK_RUNNING : check_preempt_curr(); : task->state = TASK_DEAD (*B) <--- set TASK_RUNNING (*C) schedule() (exit task is running again) BUG_ON() is called! -------------------------------------------------------- The execution time between (*A) and (*B) is usually very short, because the interruption is disabled, and setting TASK_RUNNING at (*C) must be executed before setting TASK_DEAD. HOWEVER, if SMI is interrupted between (*A) and (*B), (*C) is able to execute AFTER setting TASK_DEAD! Then, exited task is scheduled again, and BUG_ON() is called.... If the system works on guest system of virtual machine, the time between (*A) and (*B) may be also long due to scheduling of hypervisor, and same phenomenon can occur. By this patch, do_exit() waits for releasing task->pi_lock which is used in try_to_wake_up(). It guarantees the task becomes TASK_DEAD after waking up. Signed-off-by: Yasunori Goto Acked-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20120117174031.3118.E1E9C6FF@jp.fujitsu.com Signed-off-by: Ingo Molnar --- kernel/exit.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index 294b1709170d..4b4042f9bc6a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1038,6 +1038,22 @@ void do_exit(long code) if (tsk->nr_dirtied) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); + + /* + * The setting of TASK_RUNNING by try_to_wake_up() may be delayed + * when the following two conditions become true. + * - There is race condition of mmap_sem (It is acquired by + * exit_mm()), and + * - SMI occurs before setting TASK_RUNINNG. + * (or hypervisor of virtual machine switches to other guest) + * As a result, we may become TASK_RUNNING after becoming TASK_DEAD + * + * To avoid it, we have to wait for releasing tsk->pi_lock which + * is held by try_to_wake_up() + */ + smp_mb(); + raw_spin_unlock_wait(&tsk->pi_lock); + /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 26 Jan 2012 17:03:19 +0100 Subject: [PATCH 016/316] perf: Fix broken interrupt rate throttling This patch fixes the sampling interrupt throttling mechanism. It was broken in v3.2. Events were not being unthrottled. The unthrottling mechanism required that events be checked at each timer tick. This patch solves this problem and also separates: - unthrottling - multiplexing - frequency-mode period adjustments Not all of them need to be executed at each timer tick. This third version of the patch is based on my original patch + PeterZ proposal (https://lkml.org/lkml/2012/1/7/87). At each timer tick, for each context: - if the current CPU has throttled events, we unthrottle events - if context has frequency-based events, we adjust sampling periods - if we have reached the jiffies interval, we multiplex (rotate) We decoupled rotation (multiplexing) from frequency-mode sampling period adjustments. They should not necessarily happen at the same rate. Multiplexing is subject to jiffies_interval (currently at 1 but could be higher once the tunable is exposed via sysfs). We have grouped frequency-mode adjustment and unthrottling into the same routine to minimize code duplication. When throttled while in frequency mode, we scan the events only once. We have fixed the threshold enforcement code in __perf_event_overflow(). There was a bug whereby it would allow more than the authorized rate because an increment of hwc->interrupts was not executed at the right place. The patch was tested with low sampling limit (2000) and fixed periods, frequency mode, overcommitted PMU. On a 2.1GHz AMD CPU: $ cat /proc/sys/kernel/perf_event_max_sample_rate 2000 We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000): $ perf record -e cycles,cycles -c 700000 noploop 10 $ perf report -D | tail -21 Aggregated stats: TOTAL events: 80086 MMAP events: 88 COMM events: 2 EXIT events: 4 THROTTLE events: 19996 UNTHROTTLE events: 19996 SAMPLE events: 40000 cycles stats: TOTAL events: 40006 MMAP events: 5 COMM events: 1 EXIT events: 4 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 cycles stats: TOTAL events: 39996 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 For 10s, the cap is 2x2000x10 = 40000 samples. We get exactly that: 20000 samples/event. Signed-off-by: Stephane Eranian Cc: # v3.2+ Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 104 +++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 38 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 08855613ceb3..abb2776be1ba 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -587,6 +587,7 @@ struct hw_perf_event { u64 sample_period; u64 last_period; local64_t period_left; + u64 interrupts_seq; u64 interrupts; u64 freq_time_stamp; diff --git a/kernel/events/core.c b/kernel/events/core.c index 32b48c889711..ba36013cfb21 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2300,6 +2300,9 @@ do { \ return div64_u64(dividend, divisor); } +static DEFINE_PER_CPU(int, perf_throttled_count); +static DEFINE_PER_CPU(u64, perf_throttled_seq); + static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) +/* + * combine freq adjustment with unthrottling to avoid two passes over the + * events. At the same time, make sure, having freq events does not change + * the rate of unthrottling as that would introduce bias. + */ +static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, + int needs_unthr) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, now; + u64 now, period = TICK_NSEC; s64 delta; - if (!ctx->nr_freq) + /* + * only need to iterate over all events iff: + * - context have events in frequency mode (needs freq adjust) + * - there are events to unthrottle on this cpu + */ + if (!(ctx->nr_freq || needs_unthr)) return; + raw_spin_lock(&ctx->lock); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) hwc = &event->hw; - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle events on the tick - */ - if (interrupts == MAX_INTERRUPTS) { + if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) { + hwc->interrupts = 0; perf_log_throttle(event, 1); event->pmu->start(event, 0); } @@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (!event->attr.freq || !event->attr.sample_freq) continue; - event->pmu->read(event); + /* + * stop the event and update event->count + */ + event->pmu->stop(event, PERF_EF_UPDATE); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; + /* + * restart the event + * reload only if value has changed + */ if (delta > 0) perf_adjust_period(event, period, delta); + + event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); } + + raw_spin_unlock(&ctx->lock); } /* @@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx) */ static void perf_rotate_context(struct perf_cpu_context *cpuctx) { - u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; struct perf_event_context *ctx = NULL; - int rotate = 0, remove = 1, freq = 0; + int rotate = 0, remove = 1; if (cpuctx->ctx.nr_events) { remove = 0; if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) rotate = 1; - if (cpuctx->ctx.nr_freq) - freq = 1; } ctx = cpuctx->task_ctx; @@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) remove = 0; if (ctx->nr_events != ctx->nr_active) rotate = 1; - if (ctx->nr_freq) - freq = 1; } - if (!rotate && !freq) + if (!rotate) goto done; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (freq) { - perf_ctx_adjust_freq(&cpuctx->ctx, interval); - if (ctx) - perf_ctx_adjust_freq(ctx, interval); - } + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + if (ctx) + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); - if (rotate) { - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - if (ctx) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + rotate_ctx(&cpuctx->ctx); + if (ctx) + rotate_ctx(ctx); - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); - - perf_event_sched_in(cpuctx, ctx, current); - } + perf_event_sched_in(cpuctx, ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); - done: if (remove) list_del_init(&cpuctx->rotation_list); @@ -2445,10 +2454,22 @@ void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); struct perf_cpu_context *cpuctx, *tmp; + struct perf_event_context *ctx; + int throttled; WARN_ON(!irqs_disabled()); + __this_cpu_inc(perf_throttled_seq); + throttled = __this_cpu_xchg(perf_throttled_count, 0); + list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { + ctx = &cpuctx->ctx; + perf_adjust_freq_unthr_context(ctx, throttled); + + ctx = cpuctx->task_ctx; + if (ctx) + perf_adjust_freq_unthr_context(ctx, throttled); + if (cpuctx->jiffies_interval == 1 || !(jiffies % cpuctx->jiffies_interval)) perf_rotate_context(cpuctx); @@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event, { int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; + u64 seq; int ret = 0; /* @@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(!is_sampling_event(event))) return 0; - if (unlikely(hwc->interrupts >= max_samples_per_tick)) { - if (throttle) { + seq = __this_cpu_read(perf_throttled_seq); + if (seq != hwc->interrupts_seq) { + hwc->interrupts_seq = seq; + hwc->interrupts = 1; + } else { + hwc->interrupts++; + if (unlikely(throttle + && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); ret = 1; } - } else - hwc->interrupts++; + } if (event->attr.freq) { u64 now = perf_clock(); From cb297a3e433dbdcf7ad81e0564e7b804c941ff0d Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Thu, 5 Jan 2012 20:00:19 +0900 Subject: [PATCH 017/316] sched/rt: Fix task stack corruption under __ARCH_WANT_INTERRUPTS_ON_CTXSW This issue happens under the following conditions: 1. preemption is off 2. __ARCH_WANT_INTERRUPTS_ON_CTXSW is defined 3. RT scheduling class 4. SMP system Sequence is as follows: 1.suppose current task is A. start schedule() 2.task A is enqueued pushable task at the entry of schedule() __schedule prev = rq->curr; ... put_prev_task put_prev_task_rt enqueue_pushable_task 4.pick the task B as next task. next = pick_next_task(rq); 3.rq->curr set to task B and context_switch is started. rq->curr = next; 4.At the entry of context_swtich, release this cpu's rq->lock. context_switch prepare_task_switch prepare_lock_switch raw_spin_unlock_irq(&rq->lock); 5.Shortly after rq->lock is released, interrupt is occurred and start IRQ context 6.try_to_wake_up() which called by ISR acquires rq->lock try_to_wake_up ttwu_remote rq = __task_rq_lock(p) ttwu_do_wakeup(rq, p, wake_flags); task_woken_rt 7.push_rt_task picks the task A which is enqueued before. task_woken_rt push_rt_tasks(rq) next_task = pick_next_pushable_task(rq) 8.At find_lock_lowest_rq(), If double_lock_balance() returns 0, lowest_rq can be the remote rq. (But,If preemption is on, double_lock_balance always return 1 and it does't happen.) push_rt_task find_lock_lowest_rq if (double_lock_balance(rq, lowest_rq)).. 9.find_lock_lowest_rq return the available rq. task A is migrated to the remote cpu/rq. push_rt_task ... deactivate_task(rq, next_task, 0); set_task_cpu(next_task, lowest_rq->cpu); activate_task(lowest_rq, next_task, 0); 10. But, task A is on irq context at this cpu. So, task A is scheduled by two cpus at the same time until restore from IRQ. Task A's stack is corrupted. To fix it, don't migrate an RT task if it's still running. Signed-off-by: Chanho Min Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Link: http://lkml.kernel.org/r/CAOAMb1BHA=5fm7KTewYyke6u-8DP0iUuJMpgQw54vNeXFsGpoQ@mail.gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3640ebbb466b..f42ae7fb5ec5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1587,6 +1587,11 @@ static int push_rt_task(struct rq *rq) if (!next_task) return 0; +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + if (unlikely(task_running(rq, next_task))) + return 0; +#endif + retry: if (unlikely(next_task == rq->curr)) { WARN_ON(1); From a389d67cf9849aff1722ed73186a584e2196a873 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 27 Jan 2012 14:31:19 +0100 Subject: [PATCH 018/316] ALSA: HDA: Remove quirk for Asus N53Jq The user reports that he needs to add model=auto for audio to work properly. In fact, since node 0x15 is not even a pin node, the existing fixup is definitely wrong. Relevant information can be found in the buglink below. Cc: stable@kernel.org (3.2+) BugLink: https://bugs.launchpad.net/bugs/918254 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0db1dc49382b..a7f17becbd7c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5377,7 +5377,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8330, "ASUS Eeepc P703 P900A", ALC269_FIXUP_AMIC), SND_PCI_QUIRK(0x1043, 0x1013, "ASUS N61Da", ALC269_FIXUP_AMIC), - SND_PCI_QUIRK(0x1043, 0x1113, "ASUS N63Jn", ALC269_FIXUP_AMIC), SND_PCI_QUIRK(0x1043, 0x1143, "ASUS B53f", ALC269_FIXUP_AMIC), SND_PCI_QUIRK(0x1043, 0x1133, "ASUS UJ20ft", ALC269_FIXUP_AMIC), SND_PCI_QUIRK(0x1043, 0x1183, "ASUS K72DR", ALC269_FIXUP_AMIC), From e47e321a35c741ee41b67976f8c6a3a7a42bc5c0 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Fri, 20 Jan 2012 18:43:54 +0000 Subject: [PATCH 019/316] RDMA/core: Fix kernel panic by always initializing qp->usecnt We have just been investigating kernel panics related to cq->ibcq.event_handler() completion calls. The problem is that ib_destroy_qp() fails with -EBUSY. Further investigation revealed qp->usecnt is not initialized. This counter was introduced in linux-3.2 by commit 0e0ec7e0638e ("RDMA/core: Export ib_open_qp() to share XRC TGT QPs") but it only gets initialized for IB_QPT_XRC_TGT, but it is checked in ib_destroy_qp() for any QP type. Fix this by initializing qp->usecnt for every QP we create. Signed-off-by: Bernd Schubert Signed-off-by: Sven Breuner [ Initialize qp->usecnt in uverbs too. - Sean ] Signed-off-by: Sean Hefty Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 1 + drivers/infiniband/core/verbs.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b930da4c0c63..4d27e4c3fe34 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1485,6 +1485,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 602b1bd723a9..575b78045aaf 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -421,6 +421,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + atomic_set(&qp->usecnt, 0); if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -430,7 +431,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = qp_init_attr->xrcd; atomic_inc(&qp_init_attr->xrcd->usecnt); INIT_LIST_HEAD(&qp->open_list); - atomic_set(&qp->usecnt, 0); real_qp = qp; qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, From ef5352875a12a0a45be8bca29fc218a94786d920 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 Jan 2012 09:15:29 +0300 Subject: [PATCH 020/316] IB/ipath: Calling PTR_ERR() on right variable in create_file() "dentry" is a valid pointer. "*dentry" was intended. Signed-off-by: Dan Carpenter Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index b7d4216db3c3..a4de9d58e9b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -89,7 +89,7 @@ static int create_file(const char *name, umode_t mode, error = ipathfs_mknod(parent->d_inode, *dentry, mode, fops, data); else - error = PTR_ERR(dentry); + error = PTR_ERR(*dentry); mutex_unlock(&parent->d_inode->i_mutex); return error; From 81f99dcc93226c7accfe5c4edad1749b8aebf7db Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 17 Jan 2012 10:17:30 -0600 Subject: [PATCH 021/316] RDMA/nes: Fix for sending MPA reject frame Set a reject flag, when sending MPA reject message to inform the peer that the application has rejected the connection. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 425065b36b8c..183f7ab120db 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -233,6 +233,7 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) u8 *start_ptr = &start_addr; u8 **start_buff = &start_ptr; u16 buff_len = 0; + struct ietf_mpa_v1 *mpa_frame; skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { @@ -242,6 +243,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) /* send an MPA reject frame */ cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + mpa_frame = (struct ietf_mpa_v1 *)*start_buff; + mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); cm_node->state = NES_CM_STATE_FIN_WAIT1; From 7525c85be0e6d18596390e7e2b17a206cd9777f6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 27 Jan 2012 09:54:30 -0800 Subject: [PATCH 022/316] RDMA/nes: Add missing rcu_read_unlock() in nes_addr_resolve_neigh() Make sure all exit paths from this function unlock everything. Reported-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 183f7ab120db..271279cccad8 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1363,8 +1363,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, neigh->ha, ETH_ALEN)) { /* Mac address same as in nes_arp_table */ - ip_rt_put(rt); - return rc; + goto out; } nes_manage_arp_cache(nesvnic->netdev, @@ -1380,6 +1379,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh_event_send(neigh, NULL); } } + +out: rcu_read_unlock(); ip_rt_put(rt); return rc; From 0f3696eb21ef39b7bb7ea7d0160e925904309281 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 9 Jan 2012 10:40:47 +0100 Subject: [PATCH 023/316] IB/qib: Use GFP_ATOMIC when locks are held alloc_dummy_hdrq() is called with locks held and thus should not use GFP_KERNEL. The semantic patch that makes this report is available in scripts/coccinelle/locks/call_kern.cocci. Signed-off-by: Julia Lawall Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba6120.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 4f18e2d332df..d0c64d514813 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2105,7 +2105,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd) dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, dd->rcd[0]->rcvhdrq_size, &dd->cspec->dummy_hdrq_phys, - GFP_KERNEL | __GFP_COMP); + GFP_ATOMIC | __GFP_COMP); if (!dd->cspec->dummy_hdrq) { qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); /* fallback to just 0'ing */ From b6bfefb0410dc49853bccd9673ead896d317c082 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 12 Jan 2012 21:29:59 -0500 Subject: [PATCH 024/316] IB/qib: Roll back PCIe tuning change Commit 8d4548f2b ("IB/qib: Default some module parameters optimally") introduced an issue with older root complexes. They cannot handle the pcie_caps of 0x51 (MaxReadReq 4096, MaxPayload=256). A typical diagnostic in this situation reported by syslog contains the text: [PCIe Poisoned TLP][Send DMA memory read] Restore the module paramter default to zero with will avoid any changes in the root complex. Reviewed-by: Mark Debbage Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index f695061d688e..0fde788e1100 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -560,7 +560,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. */ -static int qib_pcie_caps = 0x51; +static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); From 9ced69ca5296567033804950d8d2161f454c5012 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 10 Jan 2012 23:53:41 +0000 Subject: [PATCH 025/316] RDMA/ucma: Discard all events for new connections until accepted After reporting a new connection request to user space, the rdma_ucm will discard subsequent events until the user has associated a user space idenfier with the kernel cm_id. This is needed to avoid reporting a reject/disconnect event to the user for a request that they may not have processed. The user space identifier is set once the user tries to accept the connection request. However, the following race exists in ucma_accept(): ctx->uid = cmd.uid; ret = rdma_accept(ctx->cm_id, ...); Once ctx->uid has been set, new events may be reported to the user. While the above mentioned race is avoided, there is an issue that the user _may_ receive a reject/disconnect event if rdma_accept() fails, depending on when the event is processed. To simplify the use of rdma_accept(), discard all events unless rdma_accept() succeeds. This problem was discovered based on questions from Roland Dreier . Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/ucma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b37b0c02a7b9..5034a87cc72d 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -808,9 +808,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ctx->uid = cmd.uid; ucma_copy_conn_param(&conn_param, &cmd.conn_param); + mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); + if (!ret) + ctx->uid = cmd.uid; + mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); From 94f622bdac82dd0542741382ea8c9359fd9c163b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 6 Jan 2012 18:15:01 -0600 Subject: [PATCH 026/316] RDMA/nes: Fix fast memory registration length Zero high order word of fast memory registration (FMR) length field. FMR length field is 32 bits, so high word should always be zero. Signed-off-by: Tatyana Nikolova Signed-off-by: Donald Wood Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 5095bc41c6cc..ba2eb002fd29 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3427,6 +3427,8 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, ib_wr->wr.fast_reg.length); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, ib_wr->wr.fast_reg.rkey); From 4a4b03f4efdcf9f4dd8ce68f305c5fb8ff8e56c4 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 6 Jan 2012 18:17:19 -0600 Subject: [PATCH 027/316] RDMA/nes: Fix fast memory registration opcode Fix fast memory registration opcode in local invalidate completion. Signed-off-by: Tatyana Nikolova Signed-off-by: Donald Wood Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index ba2eb002fd29..3875365a284b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3726,7 +3726,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_SEND; break; case NES_IWARP_SQ_OP_LOCINV: - entry->opcode = IB_WR_LOCAL_INV; + entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: entry->opcode = IB_WC_FAST_REG_MR; From 114395c61ad2eb5a7a5cd163fcadb2414e48245a Mon Sep 17 00:00:00 2001 From: UK KIM Date: Sat, 28 Jan 2012 01:52:22 +0900 Subject: [PATCH 028/316] ASoC: wm_hubs: fix wrong bits for LINEOUT2 N/P mixer Signed-off-by: UK KIM Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_hubs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 9ccc416d8cbc..ea2672455d07 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -592,8 +592,8 @@ SOC_DAPM_SINGLE("Output Switch", WM8993_LINE_MIXER2, 0, 1, 0), }; static const struct snd_kcontrol_new line2n_mix[] = { -SOC_DAPM_SINGLE("Left Output Switch", WM8993_LINE_MIXER2, 6, 1, 0), -SOC_DAPM_SINGLE("Right Output Switch", WM8993_LINE_MIXER2, 5, 1, 0), +SOC_DAPM_SINGLE("Left Output Switch", WM8993_LINE_MIXER2, 5, 1, 0), +SOC_DAPM_SINGLE("Right Output Switch", WM8993_LINE_MIXER2, 6, 1, 0), }; static const struct snd_kcontrol_new line2p_mix[] = { From d0caf292505d051b1026e85faf3a85e907566f31 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 Jan 2012 13:52:46 +0300 Subject: [PATCH 029/316] x86/dumpstack: Remove unneeded check in dump_trace() Smatch complains that we have some inconsistent NULL checking. If "task" were NULL then it would lead to a NULL dereference later. We can remove this test because earlier on in the function we have: if (!task) task = current; Signed-off-by: Dan Carpenter Acked-by: Frederic Weisbecker Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Clemens Ladisch Link: http://lkml.kernel.org/r/20120128105246.GA25092@elgon.mountain Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6d728d9284bd..af7785ff5aa0 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { if (regs) stack = (unsigned long *)regs->sp; - else if (task && task != current) + else if (task != current) stack = (unsigned long *)task->thread.sp; else stack = &dummy; From 8422fa110334cea79ab16c474902edb21a8b3168 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 30 Jan 2012 17:10:58 +0800 Subject: [PATCH 030/316] ALSA: Add #ifdef CONFIG_PCI guard for snd_pci_quirk_* functions This fixes below build warning when CONFIG_PCI is not set. CC sound/sound_core.o In file included from sound/sound_core.c:15: include/sound/core.h:454: warning: 'struct pci_dev' declared inside parameter list include/sound/core.h:454: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Axel Lin Signed-off-by: Takashi Iwai --- include/sound/core.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/sound/core.h b/include/sound/core.h index 5ab255f196cc..cea1b5426dfa 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -417,6 +417,7 @@ static inline int __snd_bug_on(int cond) #define gameport_get_port_data(gp) (gp)->port_data #endif +#ifdef CONFIG_PCI /* PCI quirk list helper */ struct snd_pci_quirk { unsigned short subvendor; /* PCI subvendor ID */ @@ -456,5 +457,6 @@ snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list); const struct snd_pci_quirk * snd_pci_quirk_lookup_id(u16 vendor, u16 device, const struct snd_pci_quirk *list); +#endif #endif /* __SOUND_CORE_H */ From 5955633e91bfc5cd0a41d8d82259e1d8b32980ef Mon Sep 17 00:00:00 2001 From: Michael D Labriola Date: Sun, 29 Jan 2012 14:17:22 -0500 Subject: [PATCH 031/316] x86/reboot: Skip DMI checks if reboot set by user Skip DMI checks for vendor specific reboot quirks if the user passed in a reboot= arg on the command line - we should never override user choices. Signed-off-by: Michael D Labriola Cc: Alan Cox Cc: Michael D Labriola Cc: Matthew Garrett Cc: Linus Torvalds Link: http://lkml.kernel.org/r/87wr8ab9od.fsf@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 37a458b521a6..b257f0e28824 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -39,6 +39,14 @@ static int reboot_mode; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; +/* This variable is used privately to keep track of whether or not + * reboot_type is still set to its default value (i.e., reboot= hasn't + * been set on the command line). This is needed so that we can + * suppress DMI scanning for reboot quirks. Without it, it's + * impossible to override a faulty reboot quirk without recompiling. + */ +static int reboot_default = 1; + #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) static int reboot_cpu = -1; #endif @@ -67,6 +75,12 @@ bool port_cf9_safe = false; static int __init reboot_setup(char *str) { for (;;) { + /* Having anything passed on the command line via + * reboot= will cause us to disable DMI checking + * below. + */ + reboot_default = 0; + switch (*str) { case 'w': reboot_mode = 0x1234; @@ -316,7 +330,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { static int __init reboot_init(void) { - dmi_check_system(reboot_dmi_table); + /* Only do the DMI check if reboot_type hasn't been overridden + * on the command line + */ + if (reboot_default) { + dmi_check_system(reboot_dmi_table); + } return 0; } core_initcall(reboot_init); @@ -465,7 +484,12 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { static int __init pci_reboot_init(void) { - dmi_check_system(pci_reboot_dmi_table); + /* Only do the DMI check if reboot_type hasn't been overridden + * on the command line + */ + if (reboot_default) { + dmi_check_system(pci_reboot_dmi_table); + } return 0; } core_initcall(pci_reboot_init); From e6d36a653becc7bbc643c399a77882e02bf552cb Mon Sep 17 00:00:00 2001 From: Michael D Labriola Date: Sun, 29 Jan 2012 14:21:17 -0500 Subject: [PATCH 032/316] x86/reboot: Remove VersaLogic Menlow reboot quirk This commit removes the reboot quirk originally added by commit e19e074 ("x86: Fix reboot problem on VersaLogic Menlow boards"). Testing with a VersaLogic Ocelot (VL-EPMs-21a rev 1.00 w/ BIOS 6.5.102) revealed the following regarding the reboot hang problem: - v2.6.37 reboot=bios was needed. - v2.6.38-rc1: behavior changed, reboot=acpi is needed, reboot=kbd and reboot=bios results in system hang. - v2.6.38: VersaLogic patch (e19e074 "x86: Fix reboot problem on VersaLogic Menlow boards") was applied prior to v2.6.38-rc7. This patch sets a quirk for VersaLogic Menlow boards that forces the use of reboot=bios, which doesn't work anymore. - v3.2: It seems that commit 660e34c ("x86: Reorder reboot method preferences") changed the default reboot method to acpi prior to v3.0-rc1, which means the default behavior is appropriate for the Ocelot. No VersaLogic quirk is required. The Ocelot board used for testing can successfully reboot w/out having to pass any reboot= arguments for all 3 current versions of the BIOS. Signed-off-by: Michael D Labriola Cc: Matthew Garrett Cc: Michael D Labriola Cc: Kushal Koolwal Cc: Linus Torvalds Link: http://lkml.kernel.org/r/87vcnub9hu.fsf@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index b257f0e28824..d840e69a853c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -309,14 +309,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, - { /* Handle problems with rebooting on VersaLogic Menlow boards */ - .callback = set_bios_reboot, - .ident = "VersaLogic Menlow based board", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"), - DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), - }, - }, { /* Handle reboot issue on Acer Aspire one */ .callback = set_kbd_reboot, .ident = "Acer Aspire One A110", From 1ae5cbc52e7c6619a3f44b87809fd25370df31bb Mon Sep 17 00:00:00 2001 From: Denis 'GNUtoo' Carikli Date: Mon, 30 Jan 2012 00:31:47 +0100 Subject: [PATCH 033/316] ASoC: neo1973_wm8753: remove references to the neo1973-gta01 machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Openmoko GTA01 machine has been removed from the machine ID database, so we need to remove references to it as well. Without that fix we have: sound/soc/samsung/neo1973_wm8753.c: In function ‘neo1973_wm8753_init’: sound/soc/samsung/neo1973_wm8753.c:325:2: error: implicit declaration of function ‘machine_is_neo1973_gta01’ Signed-off-by: Denis 'GNUtoo' Carikli Signed-off-by: Mark Brown --- sound/soc/samsung/neo1973_wm8753.c | 65 +----------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/sound/soc/samsung/neo1973_wm8753.c b/sound/soc/samsung/neo1973_wm8753.c index 7ac0ba2025c3..c6012ff5bd3e 100644 --- a/sound/soc/samsung/neo1973_wm8753.c +++ b/sound/soc/samsung/neo1973_wm8753.c @@ -230,8 +230,6 @@ static const struct snd_kcontrol_new neo1973_wm8753_controls[] = { /* GTA02 specific routes and controls */ -#ifdef CONFIG_MACH_NEO1973_GTA02 - static int gta02_speaker_enabled; static int lm4853_set_spk(struct snd_kcontrol *kcontrol, @@ -311,10 +309,6 @@ static int neo1973_gta02_wm8753_init(struct snd_soc_codec *codec) return 0; } -#else -static int neo1973_gta02_wm8753_init(struct snd_soc_code *codec) { return 0; } -#endif - static int neo1973_wm8753_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_codec *codec = rtd->codec; @@ -322,10 +316,6 @@ static int neo1973_wm8753_init(struct snd_soc_pcm_runtime *rtd) int ret; /* set up NC codec pins */ - if (machine_is_neo1973_gta01()) { - snd_soc_dapm_nc_pin(dapm, "LOUT2"); - snd_soc_dapm_nc_pin(dapm, "ROUT2"); - } snd_soc_dapm_nc_pin(dapm, "OUT3"); snd_soc_dapm_nc_pin(dapm, "OUT4"); snd_soc_dapm_nc_pin(dapm, "LINE1"); @@ -370,50 +360,6 @@ static int neo1973_wm8753_init(struct snd_soc_pcm_runtime *rtd) return 0; } -/* GTA01 specific controls */ - -#ifdef CONFIG_MACH_NEO1973_GTA01 - -static const struct snd_soc_dapm_route neo1973_lm4857_routes[] = { - {"Amp IN", NULL, "ROUT1"}, - {"Amp IN", NULL, "LOUT1"}, - - {"Handset Spk", NULL, "Amp EP"}, - {"Stereo Out", NULL, "Amp LS"}, - {"Headphone", NULL, "Amp HP"}, -}; - -static const struct snd_soc_dapm_widget neo1973_lm4857_dapm_widgets[] = { - SND_SOC_DAPM_SPK("Handset Spk", NULL), - SND_SOC_DAPM_SPK("Stereo Out", NULL), - SND_SOC_DAPM_HP("Headphone", NULL), -}; - -static int neo1973_lm4857_init(struct snd_soc_dapm_context *dapm) -{ - int ret; - - ret = snd_soc_dapm_new_controls(dapm, neo1973_lm4857_dapm_widgets, - ARRAY_SIZE(neo1973_lm4857_dapm_widgets)); - if (ret) - return ret; - - ret = snd_soc_dapm_add_routes(dapm, neo1973_lm4857_routes, - ARRAY_SIZE(neo1973_lm4857_routes)); - if (ret) - return ret; - - snd_soc_dapm_ignore_suspend(dapm, "Stereo Out"); - snd_soc_dapm_ignore_suspend(dapm, "Handset Spk"); - snd_soc_dapm_ignore_suspend(dapm, "Headphone"); - - return 0; -} - -#else -static int neo1973_lm4857_init(struct snd_soc_dapm_context *dapm) { return 0; }; -#endif - static struct snd_soc_dai_link neo1973_dai[] = { { /* Hifi Playback - for similatious use with voice below */ .name = "WM8753", @@ -440,11 +386,6 @@ static struct snd_soc_aux_dev neo1973_aux_devs[] = { .name = "dfbmcs320", .codec_name = "dfbmcs320.0", }, - { - .name = "lm4857", - .codec_name = "lm4857.0-007c", - .init = neo1973_lm4857_init, - }, }; static struct snd_soc_codec_conf neo1973_codec_conf[] = { @@ -454,14 +395,10 @@ static struct snd_soc_codec_conf neo1973_codec_conf[] = { }, }; -#ifdef CONFIG_MACH_NEO1973_GTA02 static const struct gpio neo1973_gta02_gpios[] = { { GTA02_GPIO_HP_IN, GPIOF_OUT_INIT_HIGH, "GTA02_HP_IN" }, { GTA02_GPIO_AMP_SHUT, GPIOF_OUT_INIT_HIGH, "GTA02_AMP_SHUT" }, }; -#else -static const struct gpio neo1973_gta02_gpios[] = {}; -#endif static struct snd_soc_card neo1973 = { .name = "neo1973", @@ -480,7 +417,7 @@ static int __init neo1973_init(void) { int ret; - if (!machine_is_neo1973_gta01() && !machine_is_neo1973_gta02()) + if (!machine_is_neo1973_gta02()) return -ENODEV; if (machine_is_neo1973_gta02()) { From 31150f2327cbb66363f38e13ca1be973d2f9203a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 30 Jan 2012 10:54:08 +0100 Subject: [PATCH 034/316] ALSA: hda - Apply 0x0f-VREF fix to all ASUS laptops with ALC861/660 It turned out that other ASUS laptops require the similar fix to enable the VREF on the pin 0x0f for the secret output amp, not only ASUS A6Rp. Moreover, it's required even when the pin is being used as the output. Thus, writing a fixed value doesn't work always. This patch applies the VREF-fix for all ASUS laptops with ALC861/660 in a fixup function that checks the current value and turns on only the VREF value no matter whether input or output direction is set. The automute function is modified as well to keep the pin VREF upon muting/unmuting via pin-control; otherwise the pin VREF is reset at plugging/unplugging a jack. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42588 Cc: [v3.2+] Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 43 ++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a7f17becbd7c..42b6a01e17db 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -177,6 +177,7 @@ struct alc_spec { unsigned int detect_lo:1; /* Line-out detection enabled */ unsigned int automute_speaker_possible:1; /* there are speakers and either LO or HP */ unsigned int automute_lo_possible:1; /* there are line outs and HP */ + unsigned int keep_vref_in_automute:1; /* Don't clear VREF in automute */ /* other flags */ unsigned int no_analog :1; /* digital I/O only */ @@ -495,13 +496,24 @@ static void do_automute(struct hda_codec *codec, int num_pins, hda_nid_t *pins, for (i = 0; i < num_pins; i++) { hda_nid_t nid = pins[i]; + unsigned int val; if (!nid) break; switch (spec->automute_mode) { case ALC_AUTOMUTE_PIN: + /* don't reset VREF value in case it's controlling + * the amp (see alc861_fixup_asus_amp_vref_0f()) + */ + if (spec->keep_vref_in_automute) { + val = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_PIN_WIDGET_CONTROL, 0); + val &= ~PIN_HP; + } else + val = 0; + val |= pin_bits; snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - pin_bits); + val); break; case ALC_AUTOMUTE_AMP: snd_hda_codec_amp_stereo(codec, nid, HDA_OUTPUT, 0, @@ -5588,6 +5600,25 @@ enum { PINFIX_ASUS_A6RP, }; +/* On some laptops, VREF of pin 0x0f is abused for controlling the main amp */ +static void alc861_fixup_asus_amp_vref_0f(struct hda_codec *codec, + const struct alc_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + unsigned int val; + + if (action != ALC_FIXUP_ACT_INIT) + return; + val = snd_hda_codec_read(codec, 0x0f, 0, + AC_VERB_GET_PIN_WIDGET_CONTROL, 0); + if (!(val & (AC_PINCTL_IN_EN | AC_PINCTL_OUT_EN))) + val |= AC_PINCTL_IN_EN; + val |= AC_PINCTL_VREF_50; + snd_hda_codec_write(codec, 0x0f, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, val); + spec->keep_vref_in_automute = 1; +} + static const struct alc_fixup alc861_fixups[] = { [PINFIX_FSC_AMILO_PI1505] = { .type = ALC_FIXUP_PINS, @@ -5598,17 +5629,13 @@ static const struct alc_fixup alc861_fixups[] = { } }, [PINFIX_ASUS_A6RP] = { - .type = ALC_FIXUP_VERBS, - .v.verbs = (const struct hda_verb[]) { - /* node 0x0f VREF seems controlling the master output */ - { 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF50 }, - { } - }, + .type = ALC_FIXUP_FUNC, + .v.func = alc861_fixup_asus_amp_vref_0f, }, }; static const struct snd_pci_quirk alc861_fixup_tbl[] = { - SND_PCI_QUIRK(0x1043, 0x1393, "ASUS A6Rp", PINFIX_ASUS_A6RP), + SND_PCI_QUIRK_VENDOR(0x1043, "ASUS laptop", PINFIX_ASUS_A6RP), SND_PCI_QUIRK(0x1584, 0x2b01, "Haier W18", PINFIX_ASUS_A6RP), SND_PCI_QUIRK(0x1734, 0x10c7, "FSC Amilo Pi1505", PINFIX_FSC_AMILO_PI1505), {} From 320cfa6ce0b3dc794fedfa4bae54c0f65077234d Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 29 Jan 2012 12:41:15 +0100 Subject: [PATCH 035/316] firewire: ohci: disable MSI on Ricoh controllers The PCIe device FireWire (IEEE 1394) [0c00]: Ricoh Co Ltd FireWire Host Controller [1180:e832] (prog-if 10 [OHCI]) is unable to access attached FireWire devices when MSI is enabled but works if MSI is disabled. http://www.mail-archive.com/alsa-user@lists.sourceforge.net/msg28251.html Hence add the "disable MSI" quirks flag for this device, or in fact for safety and simplicity for all current (R5U230, R5U231, R5U240) and future Ricoh PCIe 1394 controllers. Reported-by: Stefan Thomas Cc: 2.6.36+ Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 21250eca28d1..7f5f0da726da 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -303,7 +303,7 @@ static const struct { QUIRK_NO_MSI}, {PCI_VENDOR_ID_RICOH, PCI_ANY_ID, PCI_ANY_ID, - QUIRK_CYCLE_TIMER}, + QUIRK_CYCLE_TIMER | QUIRK_NO_MSI}, {PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_TSB12LV22, PCI_ANY_ID, QUIRK_CYCLE_TIMER | QUIRK_RESET_PACKET | QUIRK_NO_1394A}, From a6f7feae6d19e84253918d88b04153af09d3a243 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 26 Jan 2012 16:41:33 +0200 Subject: [PATCH 036/316] IB/mlx4: pass SMP vendor-specific attribute MADs to firmware In the current code, vendor-specific MADs (e.g with the FDR-10 attribute) are silently dropped by the driver, resulting in timeouts at the sending side and inability to query/configure the relevant feature. However, the ConnectX firmware is able to handle such MADs. For unsupported attributes, the firmware returns a GET_RESPONSE MAD containing an error status. For example, for a FDR-10 node with LID 11: # ibstat mlx4_0 1 CA: 'mlx4_0' Port 1: State: Active Physical state: LinkUp Rate: 40 (FDR10) Base lid: 11 LMC: 0 SM lid: 24 Capability mask: 0x02514868 Port GUID: 0x0002c903002e65d1 Link layer: InfiniBand Extended Port Query (EPI) vendor mad timeouts before the patch: # smpquery MEPI 11 -d ibwarn: [4196] smp_query_via: attr 0xff90 mod 0x0 route Lid 11 ibwarn: [4196] _do_madrpc: retry 1 (timeout 1000 ms) ibwarn: [4196] _do_madrpc: retry 2 (timeout 1000 ms) ibwarn: [4196] _do_madrpc: timeout after 3 retries, 3000 ms ibwarn: [4196] mad_rpc: _do_madrpc failed; dport (Lid 11) smpquery: iberror: [pid 4196] main: failed: operation EPI: ext port info query failed EPI query works OK with the patch: # smpquery MEPI 11 -d ibwarn: [6548] smp_query_via: attr 0xff90 mod 0x0 route Lid 11 ibwarn: [6548] mad_rpc: data offs 64 sz 64 mad data 0000 0000 0000 0001 0000 0001 0000 0001 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 # Ext Port info: Lid 11 port 0 StateChangeEnable:...............0x00 LinkSpeedSupported:..............0x01 LinkSpeedEnabled:................0x01 LinkSpeedActive:.................0x01 Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Acked-by: Ira Weiny Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 95c94d8f0254..259b0670b51c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -257,12 +257,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; /* - * Don't process SMInfo queries or vendor-specific - * MADs -- the SMA can't handle them. + * Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == - IB_SMP_ATTR_VENDOR_MASK)) + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || From c5488c571f08b2e21b060a1b43aa840d0a3c6e7a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 28 Nov 2011 14:22:29 -0600 Subject: [PATCH 037/316] RDMA/nes: Copyright update Update copyright information in the source files. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 2 +- drivers/infiniband/hw/nes/nes.h | 2 +- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/infiniband/hw/nes/nes_cm.h | 2 +- drivers/infiniband/hw/nes/nes_context.h | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 2 +- drivers/infiniband/hw/nes/nes_hw.h | 2 +- drivers/infiniband/hw/nes/nes_mgt.c | 2 +- drivers/infiniband/hw/nes/nes_mgt.h | 2 +- drivers/infiniband/hw/nes/nes_nic.c | 2 +- drivers/infiniband/hw/nes/nes_user.h | 2 +- drivers/infiniband/hw/nes/nes_utils.c | 2 +- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- drivers/infiniband/hw/nes/nes_verbs.h | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 7013da5e9eda..7140199f562e 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 568b4f11380a..c438e4691b3c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 271279cccad8..a4972abedef1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index bdfa1fbb35fc..4646e6666087 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h index b4393a16099d..a69eef16d72d 100644 --- a/drivers/infiniband/hw/nes/nes_context.h +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 055f4b545df0..d42c9f435b1b 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 0b590e152c6a..d748e4b31b8d 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. +* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index b3b2a240c6e9..3ba7be369452 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h index 8c8af254555a..4f7f701c4a81 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.h +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2010 Intel-NE, Inc. All rights reserved. +* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 4b3fa711a247..f3a3ecf8d09e 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 71e133ab209b..4926de744488 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 8b4c2ff54888..e98f4fc0b768 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 3875365a284b..0927b5cc65d3 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index fe6b6e92fa90..0eff7c44d76b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two From 0a84f007f97ce6fc7c07a481a5b9808b187a0193 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 17 Jan 2012 13:41:01 -0800 Subject: [PATCH 038/316] perf tools: Fix broken build by defining _GNU_SOURCE in Makefile When building on my Debian/mips system, util/util.c fails to build because commit 1aed2671738785e8f5aea663a6fda91aa7ef59b5 (perf kvm: Do guest-only counting by default) indirectly includes stdio.h before the feature selection in util.h is done. This prevents _GNU_SOURCE in util.h from enabling the declaration of getline(), from now second inclusion of stdio.h, and the build is broken. There is another breakage in util/evsel.c caused by include ordering, but I didn't fully track down the commit that caused it. The root cause of all this is an inconsistent definition of _GNU_SOURCE, so I move the definition into the Makefile so that it is passed to all invocations of the compiler and used uniformly for all system header files. All other #define and #undef of _GNU_SOURCE are removed as they cause conflicts with the definition passed to the compiler. All the features.h definitions (_LARGEFILE64_SOURCE _FILE_OFFSET_BITS=64 and _GNU_SOURCE) are needed by the python glue code too, so they are moved to BASIC_CFLAGS, and the misleading comments about BASIC_CFLAGS are removed. This gives me a clean build on x86_64 (fc12) and mips (Debian). Cc: David Daney Cc: Ingo Molnar Cc: Joerg Roedel Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1326836461-11952-1-git-send-email-ddaney.cavm@gmail.com Signed-off-by: David Daney Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 7 ++----- tools/perf/builtin-probe.c | 2 -- tools/perf/util/probe-event.c | 2 -- tools/perf/util/symbol.c | 1 - tools/perf/util/trace-event-parse.c | 3 +-- tools/perf/util/ui/browsers/hists.c | 2 -- tools/perf/util/ui/helpline.c | 1 - tools/perf/util/util.h | 1 - 8 files changed, 3 insertions(+), 16 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ac86d67b636e..7c12650165ae 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -104,7 +104,7 @@ endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm -ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -168,10 +168,7 @@ endif ### --- END CONFIGURATION SECTION --- -# Those must not be GNU-specific; they are shared with perl/ which may -# be built by a different compiler. (Note that this is an artifact now -# but it still might be nice to keep that distinction.) -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include +BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 59d43abfbfec..fb8566181f27 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -20,7 +20,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#define _GNU_SOURCE #include #include #include @@ -31,7 +30,6 @@ #include #include -#undef _GNU_SOURCE #include "perf.h" #include "builtin.h" #include "util/util.h" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eb25900e2211..29cb65459811 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -19,7 +19,6 @@ * */ -#define _GNU_SOURCE #include #include #include @@ -33,7 +32,6 @@ #include #include -#undef _GNU_SOURCE #include "util.h" #include "event.h" #include "string.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 215d50f2042e..0975438c3e72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1,4 +1,3 @@ -#define _GNU_SOURCE #include #include #include diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 6c164dc9ee95..1a8d4dc4f386 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -21,14 +21,13 @@ * The parts for function graph printing was taken and modified from the * Linux Kernel that were written by Frederic Weisbecker. */ -#define _GNU_SOURCE + #include #include #include #include #include -#undef _GNU_SOURCE #include "../perf.h" #include "util.h" #include "trace-event.h" diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 1212a386a033..e81aef1f2569 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -1,6 +1,4 @@ -#define _GNU_SOURCE #include -#undef _GNU_SOURCE #include "../libslang.h" #include #include diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c index 6ef3c5691762..4f48f5901b30 100644 --- a/tools/perf/util/ui/helpline.c +++ b/tools/perf/util/ui/helpline.c @@ -1,4 +1,3 @@ -#define _GNU_SOURCE #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b9c530cce79a..ecf9898169c8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -40,7 +40,6 @@ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) #define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 #define _BSD_SOURCE 1 #define HAS_BOOL From d8d9c282a1abbe2baf6d22f959e6adf4c90629bf Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 20 Jan 2012 10:49:12 +0100 Subject: [PATCH 039/316] perf tools: Fix strlen() bug in perf_event__synthesize_event_type() The event_type record has a max length for the event name. It's called MAX_EVENT_NAME. The name may be truncated to fit the max length. But the header.size still reflects the original name length. If that length is > MAX_EVENT_NAME, then the header.size field is bogus. Fix this by using the length of the name after the potential truncation. Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120120094912.GA4882@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3e7e0b09c12c..ecd7f4dd7eea 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2105,7 +2105,7 @@ int perf_event__synthesize_event_type(struct perf_tool *tool, strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1); ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; - size = strlen(name); + size = strlen(ev.event_type.event_type.name); size = ALIGN(size, sizeof(u64)); ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size); From 509605dbe9395ce1abbfac1dba375f074ff57a53 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Jan 2012 11:50:37 +0100 Subject: [PATCH 040/316] perf top: Fix number of samples displayed In recent versions of perf top, pressing the 'e' key to change the number of displayed samples had no effect. The number of samples was still dictated by the size of the terminal (stdio mode). That was quite annoying because typically only the first dozen samples really matter. This patch fixes this. Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120130105037.GA5160@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8f80df896038..dd162aa24baa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -89,8 +89,6 @@ void get_term_dimensions(struct winsize *ws) static void perf_top__update_print_entries(struct perf_top *top) { - top->print_entries = top->winsize.ws_row; - if (top->print_entries > 9) top->print_entries -= 9; } @@ -100,6 +98,13 @@ static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *ar struct perf_top *top = arg; get_term_dimensions(&top->winsize); + if (!top->print_entries + || (top->print_entries+4) > top->winsize.ws_row) { + top->print_entries = top->winsize.ws_row; + } else { + top->print_entries += 4; + top->winsize.ws_row = top->print_entries; + } perf_top__update_print_entries(top); } @@ -453,8 +458,10 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) }; perf_top__sig_winch(SIGWINCH, NULL, top); sigaction(SIGWINCH, &act, NULL); - } else + } else { + perf_top__sig_winch(SIGWINCH, NULL, top); signal(SIGWINCH, SIG_DFL); + } break; case 'E': if (top->evlist->nr_entries > 1) { From 9afc416517f36b3b0b109e6590d0b74468fd80f9 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 30 Jan 2012 10:53:08 +0100 Subject: [PATCH 041/316] Revert "microblaze: Add topology init" This reverts commit d761f0c521868e59cd0bc59159cbdb4686fe210d. Patch: "cpu: Register a generic CPU device on architectures that currently do not" (sha1: 9f13a1fd452f11c18004ba2422a6384b424ec8a9) selects GENERIC_CPU_DEVICES for Microblaze which register cpu. My patch was done in the same time that's why cpu was registered twice which caused this warning log: ------------[ cut here ]------------ WARNING: at fs/sysfs/dir.c:481 sysfs_add_one+0xb0/0xdc() sysfs: cannot create duplicate filename '/devices/system/cpu/cpu0' Modules linked in: ... Signed-off-by: Michal Simek --- arch/microblaze/kernel/setup.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index d4fc1a971779..604cd9dd1333 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -227,23 +226,5 @@ static int __init setup_bus_notifier(void) return 0; } + arch_initcall(setup_bus_notifier); - -static DEFINE_PER_CPU(struct cpu, cpu_devices); - -static int __init topology_init(void) -{ - int i, ret; - - for_each_present_cpu(i) { - struct cpu *c = &per_cpu(cpu_devices, i); - - ret = register_cpu(c, i); - if (ret) - printk(KERN_WARNING "topology_init: register_cpu %d " - "failed (%d)\n", i, ret); - } - - return 0; -} -subsys_initcall(topology_init); From 05c3b36e539627b7aed67d038381d0d9fa9d61e7 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 31 Jan 2012 09:04:15 +0100 Subject: [PATCH 042/316] ALSA: HDA: Fix jack creation for codecs with front and rear Line In If a codec has both a front and a rear Line In, two controls both named "Line Jack" will be created, which causes parsing to fail. While a long term solution might be to name the jacks differently, this extra check is consistent with what is already being done in many auto-parsers, and will also protect against other cases when two inputs have the same label. BugLink: https://bugs.launchpad.net/bugs/923409 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_jack.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index d8a35da0803f..9d819c4b4923 100644 --- a/sound/pci/hda/hda_jack.c +++ b/sound/pci/hda/hda_jack.c @@ -282,7 +282,8 @@ int snd_hda_jack_add_kctl(struct hda_codec *codec, hda_nid_t nid, EXPORT_SYMBOL_HDA(snd_hda_jack_add_kctl); static int add_jack_kctl(struct hda_codec *codec, hda_nid_t nid, - const struct auto_pin_cfg *cfg) + const struct auto_pin_cfg *cfg, + char *lastname, int *lastidx) { unsigned int def_conf, conn; char name[44]; @@ -298,6 +299,10 @@ static int add_jack_kctl(struct hda_codec *codec, hda_nid_t nid, return 0; snd_hda_get_pin_label(codec, nid, cfg, name, sizeof(name), &idx); + if (!strcmp(name, lastname) && idx == *lastidx) + idx++; + strncpy(lastname, name, 44); + *lastidx = idx; err = snd_hda_jack_add_kctl(codec, nid, name, idx); if (err < 0) return err; @@ -311,41 +316,42 @@ int snd_hda_jack_add_kctls(struct hda_codec *codec, const struct auto_pin_cfg *cfg) { const hda_nid_t *p; - int i, err; + int i, err, lastidx = 0; + char lastname[44] = ""; for (i = 0, p = cfg->line_out_pins; i < cfg->line_outs; i++, p++) { - err = add_jack_kctl(codec, *p, cfg); + err = add_jack_kctl(codec, *p, cfg, lastname, &lastidx); if (err < 0) return err; } for (i = 0, p = cfg->hp_pins; i < cfg->hp_outs; i++, p++) { if (*p == *cfg->line_out_pins) /* might be duplicated */ break; - err = add_jack_kctl(codec, *p, cfg); + err = add_jack_kctl(codec, *p, cfg, lastname, &lastidx); if (err < 0) return err; } for (i = 0, p = cfg->speaker_pins; i < cfg->speaker_outs; i++, p++) { if (*p == *cfg->line_out_pins) /* might be duplicated */ break; - err = add_jack_kctl(codec, *p, cfg); + err = add_jack_kctl(codec, *p, cfg, lastname, &lastidx); if (err < 0) return err; } for (i = 0; i < cfg->num_inputs; i++) { - err = add_jack_kctl(codec, cfg->inputs[i].pin, cfg); + err = add_jack_kctl(codec, cfg->inputs[i].pin, cfg, lastname, &lastidx); if (err < 0) return err; } for (i = 0, p = cfg->dig_out_pins; i < cfg->dig_outs; i++, p++) { - err = add_jack_kctl(codec, *p, cfg); + err = add_jack_kctl(codec, *p, cfg, lastname, &lastidx); if (err < 0) return err; } - err = add_jack_kctl(codec, cfg->dig_in_pin, cfg); + err = add_jack_kctl(codec, cfg->dig_in_pin, cfg, lastname, &lastidx); if (err < 0) return err; - err = add_jack_kctl(codec, cfg->mono_out_pin, cfg); + err = add_jack_kctl(codec, cfg->mono_out_pin, cfg, lastname, &lastidx); if (err < 0) return err; return 0; From 3422a47041b8cb8f14ac1e3926bcf711121df6dc Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 31 Jan 2012 10:31:49 +0100 Subject: [PATCH 043/316] ALSA: HDA: Remove quirk for Toshiba Qosmio G50 The user reports that model=auto works better than current handling on a 3.2 based kernel (with jack detection patches backported). Since model=auto is what we prefer these days anyway, the quirk should be removed. Alsa-info for the relevant machine: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/923316/+attachment/2702812/+files/alsa-info.txt.Pbfno2x7bp BugLink: https://bugs.launchpad.net/bugs/923316 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 42b6a01e17db..a8e82be3d2fc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4747,7 +4747,6 @@ enum { ALC262_FIXUP_FSC_H270, ALC262_FIXUP_HP_Z200, ALC262_FIXUP_TYAN, - ALC262_FIXUP_TOSHIBA_RX1, ALC262_FIXUP_LENOVO_3000, ALC262_FIXUP_BENQ, ALC262_FIXUP_BENQ_T31, @@ -4777,16 +4776,6 @@ static const struct alc_fixup alc262_fixups[] = { { } } }, - [ALC262_FIXUP_TOSHIBA_RX1] = { - .type = ALC_FIXUP_PINS, - .v.pins = (const struct alc_pincfg[]) { - { 0x14, 0x90170110 }, /* speaker */ - { 0x15, 0x0421101f }, /* HP */ - { 0x1a, 0x40f000f0 }, /* N/A */ - { 0x1b, 0x40f000f0 }, /* N/A */ - { 0x1e, 0x40f000f0 }, /* N/A */ - } - }, [ALC262_FIXUP_LENOVO_3000] = { .type = ALC_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -4819,8 +4808,6 @@ static const struct snd_pci_quirk alc262_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1397, "Fujitsu", ALC262_FIXUP_BENQ), SND_PCI_QUIRK(0x10cf, 0x142d, "Fujitsu Lifebook E8410", ALC262_FIXUP_BENQ), SND_PCI_QUIRK(0x10f1, 0x2915, "Tyan Thunder n6650W", ALC262_FIXUP_TYAN), - SND_PCI_QUIRK(0x1179, 0x0001, "Toshiba dynabook SS RX1", - ALC262_FIXUP_TOSHIBA_RX1), SND_PCI_QUIRK(0x1734, 0x1147, "FSC Celsius H270", ALC262_FIXUP_FSC_H270), SND_PCI_QUIRK(0x17aa, 0x384e, "Lenovo 3000", ALC262_FIXUP_LENOVO_3000), SND_PCI_QUIRK(0x17ff, 0x0560, "Benq ED8", ALC262_FIXUP_BENQ), From 5bff0386305461021bbef2d958fa0f0151f56a6f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 8 Nov 2011 15:09:19 +0300 Subject: [PATCH 044/316] SUNRPC: remove non-exclusive pipe creation from RPC pipefs This patch-set was created in context of clone of git branch: git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git. v2: 1) Rebased of current repo state (i.e. all commits were pulled before apply) I feel it is ready for inclusion if no objections will appear. SUNRPC pipefs non-exclusive pipe creation code looks obsolete. IOW, as I see it, all pipes are creating with unique full path and only once. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 - net/sunrpc/rpc_pipe.c | 52 ++++++------------------------ 2 files changed, 10 insertions(+), 43 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 2bb03d77375a..edadc3acf949 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -30,7 +30,6 @@ struct rpc_inode { int pipelen; int nreaders; int nwriters; - int nkern_readwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 63a7a7add21e..8e3397fc0a7d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -554,7 +554,6 @@ static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, if (err) return err; rpci = RPC_I(dentry->d_inode); - rpci->nkern_readwriters = 1; rpci->private = private; rpci->flags = flags; rpci->ops = ops; @@ -587,44 +586,26 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); - rpci->nkern_readwriters--; - if (rpci->nkern_readwriters != 0) - return 0; rpc_close_pipes(inode); return __rpc_unlink(dir, dentry); } -static struct dentry *__rpc_lookup_create(struct dentry *parent, - struct qstr *name) -{ - struct dentry *dentry; - - dentry = d_lookup(parent, name); - if (!dentry) { - dentry = d_alloc(parent, name); - if (!dentry) { - dentry = ERR_PTR(-ENOMEM); - goto out_err; - } - } - if (!dentry->d_inode) - d_set_d_op(dentry, &rpc_dentry_operations); -out_err: - return dentry; -} - static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, struct qstr *name) { struct dentry *dentry; - dentry = __rpc_lookup_create(parent, name); - if (IS_ERR(dentry)) - return dentry; - if (dentry->d_inode == NULL) + dentry = d_lookup(parent, name); + if (!dentry) { + dentry = d_alloc(parent, name); + if (!dentry) + return ERR_PTR(-ENOMEM); + } + if (dentry->d_inode == NULL) { + d_set_d_op(dentry, &rpc_dentry_operations); return dentry; + } dput(dentry); return ERR_PTR(-EEXIST); } @@ -812,22 +793,9 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, q.hash = full_name_hash(q.name, q.len), mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, &q); if (IS_ERR(dentry)) goto out; - if (dentry->d_inode) { - struct rpc_inode *rpci = RPC_I(dentry->d_inode); - if (rpci->private != private || - rpci->ops != ops || - rpci->flags != flags) { - dput (dentry); - err = -EBUSY; - goto out_err; - } - rpci->nkern_readwriters++; - goto out; - } - err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, private, ops, flags); if (err) From 38b0da7522c086f1dcdeda39a2d1849c6a31f518 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:38:56 +0300 Subject: [PATCH 045/316] SUNRPC: create RPC pipefs superblock per network namespace context This is the initial step of RPC pipefs virtualization. It changes nothing to current pipefs behaviour except that mount of pipefs in other than init_net network namespace context will provide only root tree. No other dentries will be visible. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 8e3397fc0a7d..e32e6b8c006d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -993,6 +993,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct dentry *root; + struct net *net = data; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1017,7 +1018,7 @@ static struct dentry * rpc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, rpc_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); } static struct file_system_type rpc_pipe_fs_type = { From 021c68dec8c04c44cb82eb5bbee77028fafe22e8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:04 +0300 Subject: [PATCH 046/316] SUNRPC: hold current network namespace while pipefs superblock is active We want to be sure that network namespace is still alive while we have pipefs mounted. This will be required later, when RPC pipefs will be mounting only from user-space context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e32e6b8c006d..f628b0f48a87 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -27,6 +27,9 @@ #include #include #include +#include + +#include "netns.h" static struct vfsmount *rpc_mnt __read_mostly; static int rpc_mount_count; @@ -1011,6 +1014,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) } if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; + sb->s_fs_info = get_net(net); return 0; } @@ -1021,11 +1025,19 @@ rpc_mount(struct file_system_type *fs_type, return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); } +void rpc_kill_sb(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + put_net(net); + kill_litter_super(sb); +} + static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", .mount = rpc_mount, - .kill_sb = kill_litter_super, + .kill_sb = rpc_kill_sb, }; static void From 2d00131acc641b2cb6f0bdefb8c7bdd8fdf7410b Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:13 +0300 Subject: [PATCH 047/316] SUNRPC: send notification events on pipefs sb creation and destruction They will be used to notify subscribers about pipefs superblock creation and destruction. Subcribers will have to create their dentries on passed superblock on mount event and destroy otherwise. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 8 ++++++++ net/sunrpc/rpc_pipe.c | 32 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index edadc3acf949..d39782ce6c67 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -43,6 +43,14 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } +extern int rpc_pipefs_notifier_register(struct notifier_block *); +extern void rpc_pipefs_notifier_unregister(struct notifier_block *); + +enum { + RPC_PIPEFS_MOUNT, + RPC_PIPEFS_UMOUNT, +}; + extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index f628b0f48a87..58a5062df260 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -28,8 +28,10 @@ #include #include #include +#include #include "netns.h" +#include "sunrpc.h" static struct vfsmount *rpc_mnt __read_mostly; static int rpc_mount_count; @@ -41,6 +43,20 @@ static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) +static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); + +int rpc_pipefs_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); + +void rpc_pipefs_notifier_unregister(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); + static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { @@ -997,6 +1013,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct dentry *root; struct net *net = data; + int err; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1014,8 +1031,20 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) } if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; + err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_MOUNT, + sb); + if (err) + goto err_depopulate; sb->s_fs_info = get_net(net); return 0; + +err_depopulate: + blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_UMOUNT, + sb); + __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); + return err; } static struct dentry * @@ -1030,6 +1059,9 @@ void rpc_kill_sb(struct super_block *sb) struct net *net = sb->s_fs_info; put_net(net); + blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_UMOUNT, + sb); kill_litter_super(sb); } From 432eb1a5fb380477ae759041bac2bb305977e436 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:22 +0300 Subject: [PATCH 048/316] SUNRPC: pipefs dentry lookup helper introduced In all places, where pipefs dentries are created, only directory inode is actually required to create new dentry. And all this directories has root pipefs dentry as their parent. So we actually don't need this pipefs mount point at all if some pipefs lookup method will be provided. IOW, all we really need is just superblock and simple lookup method to find root's child dentry with appropriate name. And this patch introduces this method. Note, that no locking implemented in rpc_d_lookup_sb(). So it can be used only in case of assurance, that pipefs superblock still exist. IOW, we can use this method only in pipefs mount-umount notification subscribers callbacks. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 3 +++ net/sunrpc/rpc_pipe.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index d39782ce6c67..2f3382230141 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -51,6 +51,9 @@ enum { RPC_PIPEFS_UMOUNT, }; +extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb, + const unsigned char *dir_name); + extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 58a5062df260..6f295e6c12a0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1007,6 +1007,22 @@ static const struct rpc_filelist files[] = { }, }; +/* + * This call can be used only in RPC pipefs mount notification hooks. + */ +struct dentry *rpc_d_lookup_sb(const struct super_block *sb, + const unsigned char *dir_name) +{ + struct qstr dir = { + .name = dir_name, + .len = strlen(dir_name), + .hash = full_name_hash(dir_name, strlen(dir_name)), + }; + + return d_lookup(sb->s_root, &dir); +} +EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { From 90c4e82999c517e0cd00d0782c68d186cb18b784 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:30 +0300 Subject: [PATCH 049/316] SUNRPC: put pipefs superblock link on network namespace We have modules (like, pNFS blocklayout module) which creates pipes on rpc_pipefs. Thus we need per-net operations for them. To make it possible we require appropriate super block. So we have to put sb link on network namespace context. Note, that it's not strongly required to create pipes in per-net operations. IOW, if pipefs wasn't mounted yet, that no sb link reference will present on network namespace and in this case we need just need to pass through pipe creation. Pipe dentry will be created during pipefs mount notification. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/netns.h | 2 ++ net/sunrpc/rpc_pipe.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index d013bf211cae..b3842529aec9 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -9,6 +9,8 @@ struct cache_detail; struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; + + struct super_block *pipefs_sb; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6f295e6c12a0..e5e1f357b561 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1029,6 +1029,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct dentry *root; struct net *net = data; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; sb->s_blocksize = PAGE_CACHE_SIZE; @@ -1053,6 +1054,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_depopulate; sb->s_fs_info = get_net(net); + sn->pipefs_sb = sb; return 0; err_depopulate: @@ -1073,7 +1075,9 @@ rpc_mount(struct file_system_type *fs_type, void rpc_kill_sb(struct super_block *sb) { struct net *net = sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + sn->pipefs_sb = NULL; put_net(net); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, From c21a588f35b1c50304e505fad542b3aab0814266 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:39 +0300 Subject: [PATCH 050/316] SUNRPC: pipefs per-net operations helper introduced During per-net pipes creation and destruction we have to make sure, that pipefs sb exists for the whole creation/destruction cycle. This is done by using special mutex which controls pipefs sb reference on network namespace context. Helper consists of two parts: first of them (rpc_get_dentry_net) searches for dentry with specified name and returns with mutex taken on success. When pipe creation or destructions is completed, caller should release this mutex by rpc_put_dentry_net call. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 3 +++ net/sunrpc/netns.h | 1 + net/sunrpc/rpc_pipe.c | 36 ++++++++++++++++++++++++++++++ net/sunrpc/sunrpc_syms.c | 1 + 4 files changed, 41 insertions(+) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 2f3382230141..c13fca34dc9c 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -53,6 +53,9 @@ enum { extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name); +extern void rpc_pipefs_init_net(struct net *net); +extern struct super_block *rpc_get_sb_net(const struct net *net); +extern void rpc_put_sb_net(const struct net *net); extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index b3842529aec9..11d2f4863403 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -11,6 +11,7 @@ struct sunrpc_net { struct cache_detail *ip_map_cache; struct super_block *pipefs_sb; + struct mutex pipefs_sb_lock; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e5e1f357b561..f075f8817773 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1023,6 +1023,40 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb, } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); +void rpc_pipefs_init_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_init(&sn->pipefs_sb_lock); +} + +/* + * This call will be used for per network namespace operations calls. + * Note: Function will be returned with pipefs_sb_lock taken if superblock was + * found. This lock have to be released by rpc_put_sb_net() when all operations + * will be completed. + */ +struct super_block *rpc_get_sb_net(const struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_lock(&sn->pipefs_sb_lock); + if (sn->pipefs_sb) + return sn->pipefs_sb; + mutex_unlock(&sn->pipefs_sb_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(rpc_get_sb_net); + +void rpc_put_sb_net(const struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + BUG_ON(sn->pipefs_sb == NULL); + mutex_unlock(&sn->pipefs_sb_lock); +} +EXPORT_SYMBOL_GPL(rpc_put_sb_net); + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { @@ -1077,7 +1111,9 @@ void rpc_kill_sb(struct super_block *sb) struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = NULL; + mutex_unlock(&sn->pipefs_sb_lock); put_net(net); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8ec9778c3f4a..7086d11589c8 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -38,6 +38,7 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_ipmap; + rpc_pipefs_init_net(net); return 0; err_ipmap: From efc46bf2b2893132e07628ad8c06e915f3281fdc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:39:47 +0300 Subject: [PATCH 051/316] SUNRPC: added debug messages to RPC pipefs This patch adds debug messages for notification events. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index f075f8817773..9a881138fead 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -33,6 +33,10 @@ #include "netns.h" #include "sunrpc.h" +#define RPCDBG_FACILITY RPCDBG_DEBUG + +#define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") + static struct vfsmount *rpc_mnt __read_mostly; static int rpc_mount_count; @@ -1082,6 +1086,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) } if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; + dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, + NET_NAME(net)); err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, sb); @@ -1115,6 +1121,8 @@ void rpc_kill_sb(struct super_block *sb) sn->pipefs_sb = NULL; mutex_unlock(&sn->pipefs_sb_lock); put_net(net); + dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, + NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); From 766347bec3490111e1c4482af7c7394868c2aed1 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:43:23 +0300 Subject: [PATCH 052/316] SUNRPC: replace inode lock with pipe lock for RPC PipeFS operations Currenly, inode i_lock is used to provide concurrent access to SUNPRC PipeFS pipes. It looks redundant, since now other use of inode is present in most of these places and thus can be easely replaced, which will allow to remove most of inode references from PipeFS code. This is a first step towards to removing PipeFS inode references from kernel code other than PipeFS itself. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 57 +++++++++++++++--------------- net/sunrpc/rpc_pipe.c | 38 ++++++++++---------- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index c13fca34dc9c..c93ea8689bc8 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -35,6 +35,7 @@ struct rpc_inode { int flags; struct delayed_work queue_timeout; const struct rpc_pipe_ops *ops; + spinlock_t lock; }; static inline struct rpc_inode * diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index affa631ac1ab..a0844f92a447 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx) /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context - * and a new one is protected by the inode->i_lock. + * and a new one is protected by the rpci->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) @@ -316,17 +316,16 @@ static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; - struct inode *inode = &rpci->vfs_inode; struct gss_upcall_msg *old; - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); return gss_msg; } @@ -342,14 +341,14 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg) static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - struct inode *inode = &gss_msg->inode->vfs_inode; + struct rpc_inode *rpci = gss_msg->inode; if (list_empty(&gss_msg->list)) return; - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); } static void @@ -376,11 +375,11 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; - struct inode *inode = &gss_msg->inode->vfs_inode; + struct rpc_inode *rpci = gss_msg->inode; - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); gss_handle_downcall_result(gss_cred, gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -506,7 +505,7 @@ gss_refresh_upcall(struct rpc_task *task) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; - struct inode *inode; + struct rpc_inode *rpci; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -524,8 +523,8 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - inode = &gss_msg->inode->vfs_inode; - spin_lock(&inode->i_lock); + rpci = gss_msg->inode; + spin_lock(&rpci->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -538,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task) gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -549,7 +548,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { - struct inode *inode; + struct rpc_inode *rpci; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -573,14 +572,14 @@ retry: err = PTR_ERR(gss_msg); goto out; } - inode = &gss_msg->inode->vfs_inode; + rpci = gss_msg->inode; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; @@ -591,7 +590,7 @@ retry: gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -609,7 +608,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct inode *inode = filp->f_path.dentry->d_inode; + struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); struct gss_cl_ctx *ctx; uid_t uid; ssize_t err = -EFBIG; @@ -639,14 +638,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - spin_lock(&inode->i_lock); - gss_msg = __gss_find_upcall(RPC_I(inode), uid); + spin_lock(&rpci->lock); + gss_msg = __gss_find_upcall(rpci, uid); if (gss_msg == NULL) { - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { @@ -674,9 +673,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = mlen; err_release_msg: - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); @@ -726,7 +725,7 @@ gss_pipe_release(struct inode *inode) struct gss_upcall_msg *gss_msg; restart: - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); list_for_each_entry(gss_msg, &rpci->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) @@ -734,11 +733,11 @@ restart: gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); gss_release_msg(gss_msg); goto restart; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); put_pipe_version(); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9a881138fead..16d9b9a701a4 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -83,12 +83,11 @@ rpc_timeout_upcall_queue(struct work_struct *work) LIST_HEAD(free_list); struct rpc_inode *rpci = container_of(work, struct rpc_inode, queue_timeout.work); - struct inode *inode = &rpci->vfs_inode; void (*destroy_msg)(struct rpc_pipe_msg *); - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (rpci->ops == NULL) { - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); return; } destroy_msg = rpci->ops->destroy_msg; @@ -96,7 +95,7 @@ rpc_timeout_upcall_queue(struct work_struct *work) list_splice_init(&rpci->pipe, &free_list); rpci->pipelen = 0; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } @@ -136,7 +135,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (rpci->ops == NULL) goto out; if (rpci->nreaders) { @@ -153,7 +152,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) res = 0; } out: - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); wake_up(&rpci->waitq); return res; } @@ -176,14 +175,14 @@ rpc_close_pipes(struct inode *inode) ops = rpci->ops; if (ops != NULL) { LIST_HEAD(free_list); - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); need_release = rpci->nreaders != 0 || rpci->nwriters != 0; rpci->nreaders = 0; list_splice_init(&rpci->in_upcall, &free_list); list_splice_init(&rpci->pipe, &free_list); rpci->pipelen = 0; rpci->ops = NULL; - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE); rpci->nwriters = 0; if (need_release && ops->release_pipe) @@ -255,10 +254,10 @@ rpc_pipe_release(struct inode *inode, struct file *filp) goto out; msg = filp->private_data; if (msg != NULL) { - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); msg->errno = -EAGAIN; list_del_init(&msg->list); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); rpci->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) @@ -267,10 +266,10 @@ rpc_pipe_release(struct inode *inode, struct file *filp) rpci->nreaders --; if (rpci->nreaders == 0) { LIST_HEAD(free_list); - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); list_splice_init(&rpci->pipe, &free_list); rpci->pipelen = 0; - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); rpc_purge_list(rpci, &free_list, rpci->ops->destroy_msg, -EAGAIN); } @@ -298,7 +297,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) } msg = filp->private_data; if (msg == NULL) { - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (!list_empty(&rpci->pipe)) { msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, @@ -308,7 +307,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) filp->private_data = msg; msg->copied = 0; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); if (msg == NULL) goto out_unlock; } @@ -316,9 +315,9 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) res = rpci->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); list_del_init(&msg->list); - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); rpci->ops->destroy_msg(msg); } out_unlock: @@ -367,9 +366,9 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FIONREAD: - spin_lock(&inode->i_lock); + spin_lock(&rpci->lock); if (rpci->ops == NULL) { - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); return -EPIPE; } len = rpci->pipelen; @@ -378,7 +377,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) msg = filp->private_data; len += msg->len - msg->copied; } - spin_unlock(&inode->i_lock); + spin_unlock(&rpci->lock); return put_user(len, (int __user *)arg); default: return -EINVAL; @@ -1153,6 +1152,7 @@ init_once(void *foo) INIT_DELAYED_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue); rpci->ops = NULL; + spin_lock_init(&rpci->lock); } int register_rpc_pipefs(void) From ba9e097593f371ebd102580a0c5b1b2cf55636a0 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:43:32 +0300 Subject: [PATCH 053/316] SUNRPC: split SUNPRC PipeFS pipe data and inode creation Generally, pipe data is used only for pipes, and thus allocating space for it on every RPC inode allocation is redundant. This patch splits private SUNRPC PipeFS pipe data and inode, makes pipe data allocated only for pipe inodes. This patch is also is a next step towards to to removing PipeFS inode references from kernel code other than PipeFS itself. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 10 +- net/sunrpc/auth_gss/auth_gss.c | 46 +++---- net/sunrpc/rpc_pipe.c | 208 ++++++++++++++++------------- 3 files changed, 142 insertions(+), 122 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index c93ea8689bc8..57512c23d34c 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -21,9 +21,7 @@ struct rpc_pipe_ops { void (*destroy_msg)(struct rpc_pipe_msg *); }; -struct rpc_inode { - struct inode vfs_inode; - void *private; +struct rpc_pipe { struct list_head pipe; struct list_head in_upcall; struct list_head in_downcall; @@ -38,6 +36,12 @@ struct rpc_inode { spinlock_t lock; }; +struct rpc_inode { + struct inode vfs_inode; + void *private; + struct rpc_pipe *pipe; +}; + static inline struct rpc_inode * RPC_I(struct inode *inode) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a0844f92a447..e933484e55ef 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx) /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context - * and a new one is protected by the rpci->lock. + * and a new one is protected by the rpci->pipe->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) @@ -297,7 +297,7 @@ static struct gss_upcall_msg * __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &rpci->in_downcall, list) { + list_for_each_entry(pos, &rpci->pipe->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -318,14 +318,14 @@ gss_add_msg(struct gss_upcall_msg *gss_msg) struct rpc_inode *rpci = gss_msg->inode; struct gss_upcall_msg *old; - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &rpci->in_downcall); + list_add(&gss_msg->list, &rpci->pipe->in_downcall); } else gss_msg = old; - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); return gss_msg; } @@ -345,10 +345,10 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg) if (list_empty(&gss_msg->list)) return; - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); } static void @@ -377,9 +377,9 @@ gss_upcall_callback(struct rpc_task *task) struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct rpc_inode *rpci = gss_msg->inode; - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); gss_handle_downcall_result(gss_cred, gss_msg); - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -524,7 +524,7 @@ gss_refresh_upcall(struct rpc_task *task) goto out; } rpci = gss_msg->inode; - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -537,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task) gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -575,11 +575,11 @@ retry: rpci = gss_msg->inode; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; @@ -590,7 +590,7 @@ retry: gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -638,14 +638,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); gss_msg = __gss_find_upcall(rpci, uid); if (gss_msg == NULL) { - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { @@ -673,9 +673,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = mlen; err_release_msg: - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); @@ -725,19 +725,19 @@ gss_pipe_release(struct inode *inode) struct gss_upcall_msg *gss_msg; restart: - spin_lock(&rpci->lock); - list_for_each_entry(gss_msg, &rpci->in_downcall, list) { + spin_lock(&rpci->pipe->lock); + list_for_each_entry(gss_msg, &rpci->pipe->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); gss_release_msg(gss_msg); goto restart; } - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); put_pipe_version(); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 16d9b9a701a4..b6f6555128db 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -61,7 +61,7 @@ void rpc_pipefs_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); -static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, +static void rpc_purge_list(struct rpc_pipe *pipe, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; @@ -74,29 +74,29 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(&rpci->waitq); + wake_up(&pipe->waitq); } static void rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); - struct rpc_inode *rpci = - container_of(work, struct rpc_inode, queue_timeout.work); + struct rpc_pipe *pipe = + container_of(work, struct rpc_pipe, queue_timeout.work); void (*destroy_msg)(struct rpc_pipe_msg *); - spin_lock(&rpci->lock); - if (rpci->ops == NULL) { - spin_unlock(&rpci->lock); + spin_lock(&pipe->lock); + if (pipe->ops == NULL) { + spin_unlock(&pipe->lock); return; } - destroy_msg = rpci->ops->destroy_msg; - if (rpci->nreaders == 0) { - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; + destroy_msg = pipe->ops->destroy_msg; + if (pipe->nreaders == 0) { + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; } - spin_unlock(&rpci->lock); - rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); + spin_unlock(&pipe->lock); + rpc_purge_list(pipe, &free_list, destroy_msg, -ETIMEDOUT); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, @@ -135,25 +135,25 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; - spin_lock(&rpci->lock); - if (rpci->ops == NULL) + spin_lock(&rpci->pipe->lock); + if (rpci->pipe->ops == NULL) goto out; - if (rpci->nreaders) { - list_add_tail(&msg->list, &rpci->pipe); - rpci->pipelen += msg->len; + if (rpci->pipe->nreaders) { + list_add_tail(&msg->list, &rpci->pipe->pipe); + rpci->pipe->pipelen += msg->len; res = 0; - } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { - if (list_empty(&rpci->pipe)) + } else if (rpci->pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&rpci->pipe->pipe)) queue_delayed_work(rpciod_workqueue, - &rpci->queue_timeout, + &rpci->pipe->queue_timeout, RPC_UPCALL_TIMEOUT); - list_add_tail(&msg->list, &rpci->pipe); - rpci->pipelen += msg->len; + list_add_tail(&msg->list, &rpci->pipe->pipe); + rpci->pipe->pipelen += msg->len; res = 0; } out: - spin_unlock(&rpci->lock); - wake_up(&rpci->waitq); + spin_unlock(&rpci->pipe->lock); + wake_up(&rpci->pipe->waitq); return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); @@ -167,27 +167,27 @@ rpc_inode_setowner(struct inode *inode, void *private) static void rpc_close_pipes(struct inode *inode) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; const struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); - ops = rpci->ops; + ops = pipe->ops; if (ops != NULL) { LIST_HEAD(free_list); - spin_lock(&rpci->lock); - need_release = rpci->nreaders != 0 || rpci->nwriters != 0; - rpci->nreaders = 0; - list_splice_init(&rpci->in_upcall, &free_list); - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; - rpci->ops = NULL; - spin_unlock(&rpci->lock); - rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE); - rpci->nwriters = 0; + spin_lock(&pipe->lock); + need_release = pipe->nreaders != 0 || pipe->nwriters != 0; + pipe->nreaders = 0; + list_splice_init(&pipe->in_upcall, &free_list); + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; + pipe->ops = NULL; + spin_unlock(&pipe->lock); + rpc_purge_list(pipe, &free_list, ops->destroy_msg, -EPIPE); + pipe->nwriters = 0; if (need_release && ops->release_pipe) ops->release_pipe(inode); - cancel_delayed_work_sync(&rpci->queue_timeout); + cancel_delayed_work_sync(&pipe->queue_timeout); } rpc_inode_setowner(inode, NULL); mutex_unlock(&inode->i_mutex); @@ -207,6 +207,7 @@ static void rpc_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); + kfree(RPC_I(inode)->pipe); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } @@ -224,18 +225,18 @@ rpc_pipe_open(struct inode *inode, struct file *filp) int res = -ENXIO; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) + if (rpci->pipe->ops == NULL) goto out; - first_open = rpci->nreaders == 0 && rpci->nwriters == 0; - if (first_open && rpci->ops->open_pipe) { - res = rpci->ops->open_pipe(inode); + first_open = rpci->pipe->nreaders == 0 && rpci->pipe->nwriters == 0; + if (first_open && rpci->pipe->ops->open_pipe) { + res = rpci->pipe->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) - rpci->nreaders++; + rpci->pipe->nreaders++; if (filp->f_mode & FMODE_WRITE) - rpci->nwriters++; + rpci->pipe->nwriters++; res = 0; out: mutex_unlock(&inode->i_mutex); @@ -245,38 +246,38 @@ out: static int rpc_pipe_release(struct inode *inode, struct file *filp) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct rpc_pipe_msg *msg; int last_close; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) + if (pipe->ops == NULL) goto out; msg = filp->private_data; if (msg != NULL) { - spin_lock(&rpci->lock); + spin_lock(&pipe->lock); msg->errno = -EAGAIN; list_del_init(&msg->list); - spin_unlock(&rpci->lock); - rpci->ops->destroy_msg(msg); + spin_unlock(&pipe->lock); + pipe->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) - rpci->nwriters --; + pipe->nwriters --; if (filp->f_mode & FMODE_READ) { - rpci->nreaders --; - if (rpci->nreaders == 0) { + pipe->nreaders --; + if (pipe->nreaders == 0) { LIST_HEAD(free_list); - spin_lock(&rpci->lock); - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; - spin_unlock(&rpci->lock); - rpc_purge_list(rpci, &free_list, - rpci->ops->destroy_msg, -EAGAIN); + spin_lock(&pipe->lock); + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; + spin_unlock(&pipe->lock); + rpc_purge_list(pipe, &free_list, + pipe->ops->destroy_msg, -EAGAIN); } } - last_close = rpci->nwriters == 0 && rpci->nreaders == 0; - if (last_close && rpci->ops->release_pipe) - rpci->ops->release_pipe(inode); + last_close = pipe->nwriters == 0 && pipe->nreaders == 0; + if (last_close && pipe->ops->release_pipe) + pipe->ops->release_pipe(inode); out: mutex_unlock(&inode->i_mutex); return 0; @@ -291,34 +292,34 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) int res = 0; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) { + if (rpci->pipe->ops == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { - spin_lock(&rpci->lock); - if (!list_empty(&rpci->pipe)) { - msg = list_entry(rpci->pipe.next, + spin_lock(&rpci->pipe->lock); + if (!list_empty(&rpci->pipe->pipe)) { + msg = list_entry(rpci->pipe->pipe.next, struct rpc_pipe_msg, list); - list_move(&msg->list, &rpci->in_upcall); - rpci->pipelen -= msg->len; + list_move(&msg->list, &rpci->pipe->in_upcall); + rpci->pipe->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ - res = rpci->ops->upcall(filp, msg, buf, len); + res = rpci->pipe->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; - spin_lock(&rpci->lock); + spin_lock(&rpci->pipe->lock); list_del_init(&msg->list); - spin_unlock(&rpci->lock); - rpci->ops->destroy_msg(msg); + spin_unlock(&rpci->pipe->lock); + rpci->pipe->ops->destroy_msg(msg); } out_unlock: mutex_unlock(&inode->i_mutex); @@ -334,8 +335,8 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of mutex_lock(&inode->i_mutex); res = -EPIPE; - if (rpci->ops != NULL) - res = rpci->ops->downcall(filp, buf, len); + if (rpci->pipe->ops != NULL) + res = rpci->pipe->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; } @@ -347,12 +348,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) unsigned int mask = 0; rpci = RPC_I(filp->f_path.dentry->d_inode); - poll_wait(filp, &rpci->waitq, wait); + poll_wait(filp, &rpci->pipe->waitq, wait); mask = POLLOUT | POLLWRNORM; - if (rpci->ops == NULL) + if (rpci->pipe->ops == NULL) mask |= POLLERR | POLLHUP; - if (filp->private_data || !list_empty(&rpci->pipe)) + if (filp->private_data || !list_empty(&rpci->pipe->pipe)) mask |= POLLIN | POLLRDNORM; return mask; } @@ -366,18 +367,18 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FIONREAD: - spin_lock(&rpci->lock); - if (rpci->ops == NULL) { - spin_unlock(&rpci->lock); + spin_lock(&rpci->pipe->lock); + if (rpci->pipe->ops == NULL) { + spin_unlock(&rpci->pipe->lock); return -EPIPE; } - len = rpci->pipelen; + len = rpci->pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } - spin_unlock(&rpci->lock); + spin_unlock(&rpci->pipe->lock); return put_user(len, (int __user *)arg); default: return -EINVAL; @@ -562,6 +563,23 @@ static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, return 0; } +static void +init_pipe(struct rpc_pipe *pipe) +{ + pipe->nreaders = 0; + pipe->nwriters = 0; + INIT_LIST_HEAD(&pipe->in_upcall); + INIT_LIST_HEAD(&pipe->in_downcall); + INIT_LIST_HEAD(&pipe->pipe); + pipe->pipelen = 0; + init_waitqueue_head(&pipe->waitq); + INIT_DELAYED_WORK(&pipe->queue_timeout, + rpc_timeout_upcall_queue); + pipe->ops = NULL; + spin_lock_init(&pipe->lock); + +} + static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, @@ -569,16 +587,24 @@ static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, const struct rpc_pipe_ops *ops, int flags) { + struct rpc_pipe *pipe; struct rpc_inode *rpci; int err; + pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); + if (!pipe) + return -ENOMEM; + init_pipe(pipe); err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); - if (err) + if (err) { + kfree(pipe); return err; + } rpci = RPC_I(dentry->d_inode); rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; + rpci->pipe = pipe; + rpci->pipe->flags = flags; + rpci->pipe->ops = ops; fsnotify_create(dir, dentry); return 0; } @@ -1142,17 +1168,7 @@ init_once(void *foo) inode_init_once(&rpci->vfs_inode); rpci->private = NULL; - rpci->nreaders = 0; - rpci->nwriters = 0; - INIT_LIST_HEAD(&rpci->in_upcall); - INIT_LIST_HEAD(&rpci->in_downcall); - INIT_LIST_HEAD(&rpci->pipe); - rpci->pipelen = 0; - init_waitqueue_head(&rpci->waitq); - INIT_DELAYED_WORK(&rpci->queue_timeout, - rpc_timeout_upcall_queue); - rpci->ops = NULL; - spin_lock_init(&rpci->lock); + rpci->pipe = NULL; } int register_rpc_pipefs(void) From d0fe13ba9178d3bb78bbd8577bdedc00f76b7a66 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:43:41 +0300 Subject: [PATCH 054/316] SUNRPC: cleanup PipeFS redundant RPC inode usage This patch removes redundant RPC inode references from PipeFS. These places are actually where pipes operations are performed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 93 +++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b6f6555128db..299b1a3c3e49 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -132,28 +132,28 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); int rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; int res = -EPIPE; - spin_lock(&rpci->pipe->lock); - if (rpci->pipe->ops == NULL) + spin_lock(&pipe->lock); + if (pipe->ops == NULL) goto out; - if (rpci->pipe->nreaders) { - list_add_tail(&msg->list, &rpci->pipe->pipe); - rpci->pipe->pipelen += msg->len; + if (pipe->nreaders) { + list_add_tail(&msg->list, &pipe->pipe); + pipe->pipelen += msg->len; res = 0; - } else if (rpci->pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { - if (list_empty(&rpci->pipe->pipe)) + } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&pipe->pipe)) queue_delayed_work(rpciod_workqueue, - &rpci->pipe->queue_timeout, + &pipe->queue_timeout, RPC_UPCALL_TIMEOUT); - list_add_tail(&msg->list, &rpci->pipe->pipe); - rpci->pipe->pipelen += msg->len; + list_add_tail(&msg->list, &pipe->pipe); + pipe->pipelen += msg->len; res = 0; } out: - spin_unlock(&rpci->pipe->lock); - wake_up(&rpci->pipe->waitq); + spin_unlock(&pipe->lock); + wake_up(&pipe->waitq); return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); @@ -220,23 +220,23 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); - if (rpci->pipe->ops == NULL) + if (pipe->ops == NULL) goto out; - first_open = rpci->pipe->nreaders == 0 && rpci->pipe->nwriters == 0; - if (first_open && rpci->pipe->ops->open_pipe) { - res = rpci->pipe->ops->open_pipe(inode); + first_open = pipe->nreaders == 0 && pipe->nwriters == 0; + if (first_open && pipe->ops->open_pipe) { + res = pipe->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) - rpci->pipe->nreaders++; + pipe->nreaders++; if (filp->f_mode & FMODE_WRITE) - rpci->pipe->nwriters++; + pipe->nwriters++; res = 0; out: mutex_unlock(&inode->i_mutex); @@ -287,39 +287,39 @@ static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct rpc_pipe_msg *msg; int res = 0; mutex_lock(&inode->i_mutex); - if (rpci->pipe->ops == NULL) { + if (pipe->ops == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { - spin_lock(&rpci->pipe->lock); - if (!list_empty(&rpci->pipe->pipe)) { - msg = list_entry(rpci->pipe->pipe.next, + spin_lock(&pipe->lock); + if (!list_empty(&pipe->pipe)) { + msg = list_entry(pipe->pipe.next, struct rpc_pipe_msg, list); - list_move(&msg->list, &rpci->pipe->in_upcall); - rpci->pipe->pipelen -= msg->len; + list_move(&msg->list, &pipe->in_upcall); + pipe->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ - res = rpci->pipe->ops->upcall(filp, msg, buf, len); + res = pipe->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; - spin_lock(&rpci->pipe->lock); + spin_lock(&pipe->lock); list_del_init(&msg->list); - spin_unlock(&rpci->pipe->lock); - rpci->pipe->ops->destroy_msg(msg); + spin_unlock(&pipe->lock); + pipe->ops->destroy_msg(msg); } out_unlock: mutex_unlock(&inode->i_mutex); @@ -330,13 +330,13 @@ static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; int res; mutex_lock(&inode->i_mutex); res = -EPIPE; - if (rpci->pipe->ops != NULL) - res = rpci->pipe->ops->downcall(filp, buf, len); + if (pipe->ops != NULL) + res = pipe->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; } @@ -344,16 +344,15 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct rpc_inode *rpci; + struct rpc_pipe *pipe = RPC_I(filp->f_path.dentry->d_inode)->pipe; unsigned int mask = 0; - rpci = RPC_I(filp->f_path.dentry->d_inode); - poll_wait(filp, &rpci->pipe->waitq, wait); + poll_wait(filp, &pipe->waitq, wait); mask = POLLOUT | POLLWRNORM; - if (rpci->pipe->ops == NULL) + if (pipe->ops == NULL) mask |= POLLERR | POLLHUP; - if (filp->private_data || !list_empty(&rpci->pipe->pipe)) + if (filp->private_data || !list_empty(&pipe->pipe)) mask |= POLLIN | POLLRDNORM; return mask; } @@ -362,23 +361,23 @@ static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; int len; switch (cmd) { case FIONREAD: - spin_lock(&rpci->pipe->lock); - if (rpci->pipe->ops == NULL) { - spin_unlock(&rpci->pipe->lock); + spin_lock(&pipe->lock); + if (pipe->ops == NULL) { + spin_unlock(&pipe->lock); return -EPIPE; } - len = rpci->pipe->pipelen; + len = pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); return put_user(len, (int __user *)arg); default: return -EINVAL; @@ -808,7 +807,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * @private: private data to associate with the pipe, for the caller's use * @ops: operations defining the behavior of the pipe: upcall, downcall, * release_pipe, open_pipe, and destroy_msg. - * @flags: rpc_inode flags + * @flags: rpc_pipe flags * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to From d706ed1f50d3f7fae61a177183562179abe8e4bb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:43:49 +0300 Subject: [PATCH 055/316] SUNPRC: cleanup RPC PipeFS pipes upcall interface RPC pipe upcall doesn't requires only private pipe data. Thus RPC inode references in this code can be removed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayoutdev.c | 2 +- fs/nfs/blocklayout/blocklayoutdm.c | 2 +- fs/nfs/idmap.c | 4 ++-- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- net/sunrpc/auth_gss/auth_gss.c | 3 +-- net/sunrpc/rpc_pipe.c | 3 +-- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index d08ba9107fde..81019190e46d 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -146,7 +146,7 @@ nfs4_blk_decode_device(struct nfs_server *server, dprintk("%s CALLING USERSPACE DAEMON\n", __func__); add_wait_queue(&bl_wq, &wq); - rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); + rc = rpc_queue_upcall(RPC_I(bl_device_pipe->d_inode)->pipe, &msg); if (rc < 0) { remove_wait_queue(&bl_wq, &wq); rv = ERR_PTR(rc); diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index d055c7558073..3c38244a8724 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -66,7 +66,7 @@ static void dev_remove(dev_t dev) msg.len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { + if (rpc_queue_upcall(RPC_I(bl_device_pipe->d_inode)->pipe, &msg) < 0) { remove_wait_queue(&bl_wq, &wq); goto out; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2c05f1991e1e..3c63c47c793d 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -589,7 +589,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, msg.len = sizeof(*im); add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + if (rpc_queue_upcall(RPC_I(idmap->idmap_dentry->d_inode)->pipe, &msg) < 0) { remove_wait_queue(&idmap->idmap_wq, &wq); goto out; } @@ -650,7 +650,7 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + if (rpc_queue_upcall(RPC_I(idmap->idmap_dentry->d_inode)->pipe, &msg) < 0) { remove_wait_queue(&idmap->idmap_wq, &wq); goto out; } diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 57512c23d34c..3ebc257e2b43 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -64,7 +64,7 @@ extern void rpc_put_sb_net(const struct net *net); extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); -extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); +extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e933484e55ef..4157e3151581 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -473,8 +473,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - struct inode *inode = &gss_new->inode->vfs_inode; - int res = rpc_queue_upcall(inode, &gss_new->msg); + int res = rpc_queue_upcall(gss_new->inode->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); gss_msg = ERR_PTR(res); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 299b1a3c3e49..4093da79d512 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -130,9 +130,8 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); * initialize the fields of @msg (other than @msg->list) appropriately. */ int -rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) +rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) { - struct rpc_pipe *pipe = RPC_I(inode)->pipe; int res = -EPIPE; spin_lock(&pipe->lock); From 9beae4677de76cfa4ce8899dc8cd1a1cf8cd8332 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:43:57 +0300 Subject: [PATCH 056/316] SUNRPC: cleanup GSS pipes usage Currently gss auth holds RPC inode pointer which is now redundant since it requires only pipes operations which takes private pipe data as an argument. Thus this code can be cleaned and all references to RPC inode can be replaced with privtae pipe data references. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 76 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4157e3151581..304b8309f217 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx) /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context - * and a new one is protected by the rpci->pipe->lock. + * and a new one is protected by the pipe->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) @@ -251,7 +251,7 @@ struct gss_upcall_msg { struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; - struct rpc_inode *inode; + struct rpc_pipe *pipe; struct rpc_wait_queue rpc_waitqueue; wait_queue_head_t waitqueue; struct gss_cl_ctx *ctx; @@ -294,10 +294,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) +__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &rpci->pipe->in_downcall, list) { + list_for_each_entry(pos, &pipe->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -315,17 +315,17 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { - struct rpc_inode *rpci = gss_msg->inode; + struct rpc_pipe *pipe = gss_msg->pipe; struct gss_upcall_msg *old; - spin_lock(&rpci->pipe->lock); - old = __gss_find_upcall(rpci, gss_msg->uid); + spin_lock(&pipe->lock); + old = __gss_find_upcall(pipe, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &rpci->pipe->in_downcall); + list_add(&gss_msg->list, &pipe->in_downcall); } else gss_msg = old; - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); return gss_msg; } @@ -341,14 +341,14 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg) static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - struct rpc_inode *rpci = gss_msg->inode; + struct rpc_pipe *pipe = gss_msg->pipe; if (list_empty(&gss_msg->list)) return; - spin_lock(&rpci->pipe->lock); + spin_lock(&pipe->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); } static void @@ -375,11 +375,11 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; - struct rpc_inode *rpci = gss_msg->inode; + struct rpc_pipe *pipe = gss_msg->pipe; - spin_lock(&rpci->pipe->lock); + spin_lock(&pipe->lock); gss_handle_downcall_result(gss_cred, gss_msg); - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -449,7 +449,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, kfree(gss_msg); return ERR_PTR(vers); } - gss_msg->inode = RPC_I(gss_auth->dentry[vers]->d_inode); + gss_msg->pipe = RPC_I(gss_auth->dentry[vers]->d_inode)->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -473,7 +473,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->inode->pipe, &gss_new->msg); + int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); gss_msg = ERR_PTR(res); @@ -504,7 +504,7 @@ gss_refresh_upcall(struct rpc_task *task) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; - struct rpc_inode *rpci; + struct rpc_pipe *pipe; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -522,8 +522,8 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - rpci = gss_msg->inode; - spin_lock(&rpci->pipe->lock); + pipe = gss_msg->pipe; + spin_lock(&pipe->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -536,7 +536,7 @@ gss_refresh_upcall(struct rpc_task *task) gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -547,7 +547,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { - struct rpc_inode *rpci; + struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -571,14 +571,14 @@ retry: err = PTR_ERR(gss_msg); goto out; } - rpci = gss_msg->inode; + pipe = gss_msg->pipe; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); - spin_lock(&rpci->pipe->lock); + spin_lock(&pipe->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; @@ -589,7 +589,7 @@ retry: gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -607,7 +607,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe; struct gss_cl_ctx *ctx; uid_t uid; ssize_t err = -EFBIG; @@ -637,14 +637,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - spin_lock(&rpci->pipe->lock); - gss_msg = __gss_find_upcall(rpci, uid); + spin_lock(&pipe->lock); + gss_msg = __gss_find_upcall(pipe, uid); if (gss_msg == NULL) { - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { @@ -672,9 +672,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = mlen; err_release_msg: - spin_lock(&rpci->pipe->lock); + spin_lock(&pipe->lock); __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); @@ -720,23 +720,23 @@ static int gss_pipe_open_v1(struct inode *inode) static void gss_pipe_release(struct inode *inode) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; restart: - spin_lock(&rpci->pipe->lock); - list_for_each_entry(gss_msg, &rpci->pipe->in_downcall, list) { + spin_lock(&pipe->lock); + list_for_each_entry(gss_msg, &pipe->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); goto restart; } - spin_unlock(&rpci->pipe->lock); + spin_unlock(&pipe->lock); put_pipe_version(); } From c239d83b9921b8a8005a3bcd23000cfe18acf5c2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Dec 2011 15:44:06 +0300 Subject: [PATCH 057/316] SUNRPC: split SUNPRC PipeFS dentry and private pipe data creation This patch is a final step towards to removing PipeFS inode references from kernel code other than PipeFS itself. It makes all kernel SUNRPC PipeFS users depends on pipe private data, which state depend on their specific operations, etc. This patch completes SUNRPC PipeFS preparations and allows to create pipe private data and PipeFS dentries independently. Next step will be making SUNPRC PipeFS dentries allocated by SUNRPC PipeFS network namespace aware routines. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 16 ++++++--- fs/nfs/blocklayout/blocklayout.h | 2 +- fs/nfs/blocklayout/blocklayoutdev.c | 2 +- fs/nfs/blocklayout/blocklayoutdm.c | 2 +- fs/nfs/idmap.c | 28 ++++++++++----- include/linux/sunrpc/rpc_pipe_fs.h | 7 ++-- net/sunrpc/auth_gss/auth_gss.c | 54 ++++++++++++++++++---------- net/sunrpc/rpc_pipe.c | 56 ++++++++++++++++------------- 8 files changed, 108 insertions(+), 59 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 48cfac31f64c..848660fd58c4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -46,7 +46,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); -struct dentry *bl_device_pipe; +struct rpc_pipe *bl_device_pipe; wait_queue_head_t bl_wq; static void print_page(struct page *page) @@ -1051,16 +1051,23 @@ static int __init nfs4blocklayout_init(void) if (ret) goto out_putrpc; - bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, - &bl_upcall_ops, 0); + bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); path_put(&path); if (IS_ERR(bl_device_pipe)) { ret = PTR_ERR(bl_device_pipe); goto out_putrpc; } + bl_device_pipe->dentry = rpc_mkpipe_dentry(path.dentry, "blocklayout", + NULL, bl_device_pipe); + if (IS_ERR(bl_device_pipe->dentry)) { + ret = PTR_ERR(bl_device_pipe->dentry); + goto out_destroy_pipe; + } out: return ret; +out_destroy_pipe: + rpc_destroy_pipe_data(bl_device_pipe); out_putrpc: rpc_put_mount(); out_remove: @@ -1074,7 +1081,8 @@ static void __exit nfs4blocklayout_exit(void) __func__); pnfs_unregister_layoutdriver(&blocklayout_type); - rpc_unlink(bl_device_pipe); + rpc_unlink(bl_device_pipe->dentry); + rpc_destroy_pipe_data(bl_device_pipe); rpc_put_mount(); } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index e31a2df28e70..49c670b18a9e 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -161,7 +161,7 @@ struct bl_msg_hdr { u16 totallen; /* length of entire message, including hdr itself */ }; -extern struct dentry *bl_device_pipe; +extern struct rpc_pipe *bl_device_pipe; extern wait_queue_head_t bl_wq; #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 81019190e46d..949b62478799 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -146,7 +146,7 @@ nfs4_blk_decode_device(struct nfs_server *server, dprintk("%s CALLING USERSPACE DAEMON\n", __func__); add_wait_queue(&bl_wq, &wq); - rc = rpc_queue_upcall(RPC_I(bl_device_pipe->d_inode)->pipe, &msg); + rc = rpc_queue_upcall(bl_device_pipe, &msg); if (rc < 0) { remove_wait_queue(&bl_wq, &wq); rv = ERR_PTR(rc); diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 3c38244a8724..631f254d12ab 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -66,7 +66,7 @@ static void dev_remove(dev_t dev) msg.len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(RPC_I(bl_device_pipe->d_inode)->pipe, &msg) < 0) { + if (rpc_queue_upcall(bl_device_pipe, &msg) < 0) { remove_wait_queue(&bl_wq, &wq); goto out; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 3c63c47c793d..2992cb854e12 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -410,7 +410,7 @@ struct idmap_hashtable { }; struct idmap { - struct dentry *idmap_dentry; + struct rpc_pipe *idmap_pipe; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; struct mutex idmap_lock; /* Serializes upcalls */ @@ -435,6 +435,7 @@ int nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; + struct rpc_pipe *pipe; int error; BUG_ON(clp->cl_idmap != NULL); @@ -443,14 +444,23 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, - "idmap", idmap, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) { - error = PTR_ERR(idmap->idmap_dentry); + pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); + if (IS_ERR(pipe)) { + error = PTR_ERR(pipe); kfree(idmap); return error; } + if (clp->cl_rpcclient->cl_path.dentry) + pipe->dentry = rpc_mkpipe_dentry(clp->cl_rpcclient->cl_path.dentry, + "idmap", idmap, pipe); + if (IS_ERR(pipe->dentry)) { + error = PTR_ERR(pipe->dentry); + rpc_destroy_pipe_data(pipe); + kfree(idmap); + return error; + } + idmap->idmap_pipe = pipe; mutex_init(&idmap->idmap_lock); mutex_init(&idmap->idmap_im_lock); init_waitqueue_head(&idmap->idmap_wq); @@ -468,7 +478,9 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - rpc_unlink(idmap->idmap_dentry); + if (idmap->idmap_pipe->dentry) + rpc_unlink(idmap->idmap_pipe->dentry); + rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; kfree(idmap); } @@ -589,7 +601,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, msg.len = sizeof(*im); add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(RPC_I(idmap->idmap_dentry->d_inode)->pipe, &msg) < 0) { + if (rpc_queue_upcall(idmap->idmap_pipe, &msg) < 0) { remove_wait_queue(&idmap->idmap_wq, &wq); goto out; } @@ -650,7 +662,7 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(RPC_I(idmap->idmap_dentry->d_inode)->pipe, &msg) < 0) { + if (rpc_queue_upcall(idmap->idmap_pipe, &msg) < 0) { remove_wait_queue(&idmap->idmap_wq, &wq); goto out; } diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 3ebc257e2b43..0d1f748f76da 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -34,6 +34,7 @@ struct rpc_pipe { struct delayed_work queue_timeout; const struct rpc_pipe_ops *ops; spinlock_t lock; + struct dentry *dentry; }; struct rpc_inode { @@ -77,8 +78,10 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, - const struct rpc_pipe_ops *, int flags); +struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); +void rpc_destroy_pipe_data(struct rpc_pipe *pipe); +extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, + struct rpc_pipe *); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 304b8309f217..f684ce606667 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -81,7 +81,7 @@ struct gss_auth { * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ - struct dentry *dentry[2]; + struct rpc_pipe *pipe[2]; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ @@ -449,7 +449,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, kfree(gss_msg); return ERR_PTR(vers); } - gss_msg->pipe = RPC_I(gss_auth->dentry[vers]->d_inode)->pipe; + gss_msg->pipe = gss_auth->pipe[vers]; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -799,21 +799,33 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, - "gssd", - clnt, &gss_upcall_ops_v1, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry[1])) { - err = PTR_ERR(gss_auth->dentry[1]); + gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->pipe[1])) { + err = PTR_ERR(gss_auth->pipe[1]); goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, - gss_auth->mech->gm_name, - clnt, &gss_upcall_ops_v0, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry[0])) { - err = PTR_ERR(gss_auth->dentry[0]); + gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->pipe[0])) { + err = PTR_ERR(gss_auth->pipe[0]); + goto err_destroy_pipe_1; + } + + gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + "gssd", + clnt, gss_auth->pipe[1]); + if (IS_ERR(gss_auth->pipe[1]->dentry)) { + err = PTR_ERR(gss_auth->pipe[1]->dentry); + goto err_destroy_pipe_0; + } + + gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + gss_auth->mech->gm_name, + clnt, gss_auth->pipe[0]); + if (IS_ERR(gss_auth->pipe[0]->dentry)) { + err = PTR_ERR(gss_auth->pipe[0]->dentry); goto err_unlink_pipe_1; } err = rpcauth_init_credcache(auth); @@ -822,9 +834,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) return auth; err_unlink_pipe_0: - rpc_unlink(gss_auth->dentry[0]); + rpc_unlink(gss_auth->pipe[0]->dentry); err_unlink_pipe_1: - rpc_unlink(gss_auth->dentry[1]); + rpc_unlink(gss_auth->pipe[1]->dentry); +err_destroy_pipe_0: + rpc_destroy_pipe_data(gss_auth->pipe[0]); +err_destroy_pipe_1: + rpc_destroy_pipe_data(gss_auth->pipe[1]); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -837,8 +853,10 @@ out_dec: static void gss_free(struct gss_auth *gss_auth) { - rpc_unlink(gss_auth->dentry[1]); - rpc_unlink(gss_auth->dentry[0]); + rpc_unlink(gss_auth->pipe[0]->dentry); + rpc_unlink(gss_auth->pipe[1]->dentry); + rpc_destroy_pipe_data(gss_auth->pipe[0]); + rpc_destroy_pipe_data(gss_auth->pipe[1]); gss_mech_put(gss_auth->mech); kfree(gss_auth); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4093da79d512..6dd8b96e8df7 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -206,7 +206,6 @@ static void rpc_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - kfree(RPC_I(inode)->pipe); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } @@ -575,34 +574,44 @@ init_pipe(struct rpc_pipe *pipe) rpc_timeout_upcall_queue); pipe->ops = NULL; spin_lock_init(&pipe->lock); - + pipe->dentry = NULL; } -static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private, - const struct rpc_pipe_ops *ops, - int flags) +void rpc_destroy_pipe_data(struct rpc_pipe *pipe) +{ + kfree(pipe); +} +EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data); + +struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) { struct rpc_pipe *pipe; - struct rpc_inode *rpci; - int err; pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); if (!pipe) - return -ENOMEM; + return ERR_PTR(-ENOMEM); init_pipe(pipe); + pipe->ops = ops; + pipe->flags = flags; + return pipe; +} +EXPORT_SYMBOL_GPL(rpc_mkpipe_data); + +static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + struct rpc_pipe *pipe) +{ + struct rpc_inode *rpci; + int err; + err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); - if (err) { - kfree(pipe); + if (err) return err; - } rpci = RPC_I(dentry->d_inode); rpci->private = private; rpci->pipe = pipe; - rpci->pipe->flags = flags; - rpci->pipe->ops = ops; fsnotify_create(dir, dentry); return 0; } @@ -819,9 +828,8 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, - void *private, const struct rpc_pipe_ops *ops, - int flags) +struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, + void *private, struct rpc_pipe *pipe) { struct dentry *dentry; struct inode *dir = parent->d_inode; @@ -829,9 +837,9 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, struct qstr q; int err; - if (ops->upcall == NULL) + if (pipe->ops->upcall == NULL) umode &= ~S_IRUGO; - if (ops->downcall == NULL) + if (pipe->ops->downcall == NULL) umode &= ~S_IWUGO; q.name = name; @@ -842,8 +850,8 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, dentry = __rpc_lookup_create_exclusive(parent, &q); if (IS_ERR(dentry)) goto out; - err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, - private, ops, flags); + err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, + private, pipe); if (err) goto out_err; out: @@ -856,7 +864,7 @@ out_err: err); goto out; } -EXPORT_SYMBOL_GPL(rpc_mkpipe); +EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); /** * rpc_unlink - remove a pipe From 0157d021d23a087eecfa830502f81cfe843f0d16 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:01 +0400 Subject: [PATCH 058/316] SUNRPC: handle RPC client pipefs dentries by network namespace aware routines v2: 1) "Over-put" of PipeFS mount point fixed. Fix is ugly, but allows to bisect the patch set. And it will be removed later in the series. This patch makes RPC clients PipeFs dentries allocations in it's owner network namespace context. RPC client pipefs dentries creation logic has been changed: 1) Pipefs dentries creation by sb was moved to separated function, which will be used for handling PipeFS mount notification. 2) Initial value of RPC client PipeFS dir dentry is set no NULL now. RPC client pipefs dentries cleanup logic has been changed: 1) Cleanup is done now in separated rpc_remove_pipedir() function, which takes care about pipefs superblock locking. Also this patch removes slashes from cb_program.pipe_dir_name and from NFS_PIPE_DIRNAME to make rpc_d_lookup_sb() work. This doesn't affect vfs_path_lookup() results in nfs4blocklayout_init() since this slash is cutted off anyway in link_path_walk(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 2 +- include/linux/nfs.h | 2 +- net/sunrpc/clnt.c | 103 +++++++++++++++++++++++++++-------------- 3 files changed, 69 insertions(+), 38 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6f3ebb48b12f..426ccb171650 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -622,7 +622,7 @@ static struct rpc_program cb_program = { .nrvers = ARRAY_SIZE(nfs_cb_version), .version = nfs_cb_version, .stats = &cb_stats, - .pipe_dir_name = "/nfsd4_cb", + .pipe_dir_name = "nfsd4_cb", }; static int max_cb_time(void) diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 8c6ee44914cb..6d1fb63f5922 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -29,7 +29,7 @@ #define NFS_MNT_VERSION 1 #define NFS_MNT3_VERSION 3 -#define NFS_PIPE_DIRNAME "/nfs" +#define NFS_PIPE_DIRNAME "nfs" /* * NFS stats. The good thing with these values is that NFSv3 errors are diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f0268ea7e711..5ef192c1a57c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -93,52 +93,89 @@ static void rpc_unregister_client(struct rpc_clnt *clnt) spin_unlock(&rpc_client_lock); } -static int -rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) +static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) +{ + if (clnt->cl_path.dentry) + rpc_remove_client_dir(clnt->cl_path.dentry); + clnt->cl_path.dentry = NULL; +} + +static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) +{ + struct super_block *pipefs_sb; + int put_mnt = 0; + + pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); + if (pipefs_sb) { + if (clnt->cl_path.dentry) + put_mnt = 1; + __rpc_clnt_remove_pipedir(clnt); + rpc_put_sb_net(clnt->cl_xprt->xprt_net); + } + if (put_mnt) + rpc_put_mount(); +} + +static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, + struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; - struct path path, dir; char name[15]; struct qstr q = { .name = name, }; + struct dentry *dir, *dentry; int error; + dir = rpc_d_lookup_sb(sb, dir_name); + if (dir == NULL) + return dir; + for (;;) { + q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + name[sizeof(name) - 1] = '\0'; + q.hash = full_name_hash(q.name, q.len); + dentry = rpc_create_client_dir(dir, &q, clnt); + if (!IS_ERR(dentry)) + break; + error = PTR_ERR(dentry); + if (error != -EEXIST) { + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %d\n", + dir_name, name, error); + break; + } + } + dput(dir); + return dentry; +} + +static int +rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) +{ + struct super_block *pipefs_sb; + struct path path; + clnt->cl_path.mnt = ERR_PTR(-ENOENT); - clnt->cl_path.dentry = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = NULL; if (dir_name == NULL) return 0; path.mnt = rpc_get_mount(); if (IS_ERR(path.mnt)) return PTR_ERR(path.mnt); - error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir); - if (error) - goto err; - - for (;;) { - q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); - name[sizeof(name) - 1] = '\0'; - q.hash = full_name_hash(q.name, q.len); - path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt); - if (!IS_ERR(path.dentry)) - break; - error = PTR_ERR(path.dentry); - if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry" - " %s/%s, error %d\n", - dir_name, name, error); - goto err_path_put; - } + pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); + if (!pipefs_sb) { + rpc_put_mount(); + return -ENOENT; + } + path.dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); + rpc_put_sb_net(clnt->cl_xprt->xprt_net); + if (IS_ERR(path.dentry)) { + rpc_put_mount(); + return PTR_ERR(path.dentry); } - path_put(&dir); clnt->cl_path = path; return 0; -err_path_put: - path_put(&dir); -err: - rpc_put_mount(); - return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) @@ -246,10 +283,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); - rpc_put_mount(); - } + rpc_clnt_remove_pipedir(clnt); out_no_path: kfree(clnt->cl_principal); out_no_principal: @@ -474,10 +508,7 @@ rpc_free_client(struct rpc_clnt *clnt) { dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); - rpc_put_mount(); - } + rpc_clnt_remove_pipedir(clnt); if (clnt->cl_parent != clnt) { rpc_release_client(clnt->cl_parent); goto out_free; From ccdc28f81c91f7ef2dc6c28d27f50264b19e4dd5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:09 +0400 Subject: [PATCH 059/316] SUNRPC: handle GSS AUTH pipes by network namespace aware routines This patch makes RPC GSS PipeFs pipes allocated in it's RPC client owner network namespace context. Pipes creation and destruction now done in separated functions, which takes care about PipeFS superblock locking. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 95 ++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index f684ce606667..1a6fa9173519 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -757,6 +757,73 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) } } +static void gss_pipes_dentries_destroy(struct rpc_auth *auth) +{ + struct gss_auth *gss_auth; + + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + rpc_unlink(gss_auth->pipe[0]->dentry); + rpc_unlink(gss_auth->pipe[1]->dentry); +} + +static int gss_pipes_dentries_create(struct rpc_auth *auth) +{ + int err; + struct gss_auth *gss_auth; + struct rpc_clnt *clnt; + + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + clnt = gss_auth->client; + + gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + "gssd", + clnt, gss_auth->pipe[1]); + if (IS_ERR(gss_auth->pipe[1]->dentry)) + return PTR_ERR(gss_auth->pipe[1]->dentry); + gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + gss_auth->mech->gm_name, + clnt, gss_auth->pipe[0]); + if (IS_ERR(gss_auth->pipe[0]->dentry)) { + err = PTR_ERR(gss_auth->pipe[0]->dentry); + goto err_unlink_pipe_1; + } + return 0; + +err_unlink_pipe_1: + rpc_unlink(gss_auth->pipe[1]->dentry); + return err; +} + +static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, + struct rpc_auth *auth) +{ + struct net *net = clnt->cl_xprt->xprt_net; + struct super_block *sb; + + sb = rpc_get_sb_net(net); + if (sb) { + if (clnt->cl_path.dentry) + gss_pipes_dentries_destroy(auth); + rpc_put_sb_net(net); + } +} + +static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, + struct rpc_auth *auth) +{ + struct net *net = clnt->cl_xprt->xprt_net; + struct super_block *sb; + int err = 0; + + sb = rpc_get_sb_net(net); + if (sb) { + if (clnt->cl_path.dentry) + err = gss_pipes_dentries_create(auth); + rpc_put_sb_net(net); + } + return err; +} + /* * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) @@ -812,31 +879,16 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) err = PTR_ERR(gss_auth->pipe[0]); goto err_destroy_pipe_1; } - - gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, - "gssd", - clnt, gss_auth->pipe[1]); - if (IS_ERR(gss_auth->pipe[1]->dentry)) { - err = PTR_ERR(gss_auth->pipe[1]->dentry); + err = gss_pipes_dentries_create_net(clnt, auth); + if (err) goto err_destroy_pipe_0; - } - - gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, - gss_auth->mech->gm_name, - clnt, gss_auth->pipe[0]); - if (IS_ERR(gss_auth->pipe[0]->dentry)) { - err = PTR_ERR(gss_auth->pipe[0]->dentry); - goto err_unlink_pipe_1; - } err = rpcauth_init_credcache(auth); if (err) - goto err_unlink_pipe_0; + goto err_unlink_pipes; return auth; -err_unlink_pipe_0: - rpc_unlink(gss_auth->pipe[0]->dentry); -err_unlink_pipe_1: - rpc_unlink(gss_auth->pipe[1]->dentry); +err_unlink_pipes: + gss_pipes_dentries_destroy_net(clnt, auth); err_destroy_pipe_0: rpc_destroy_pipe_data(gss_auth->pipe[0]); err_destroy_pipe_1: @@ -853,8 +905,7 @@ out_dec: static void gss_free(struct gss_auth *gss_auth) { - rpc_unlink(gss_auth->pipe[0]->dentry); - rpc_unlink(gss_auth->pipe[1]->dentry); + gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); rpc_destroy_pipe_data(gss_auth->pipe[0]); rpc_destroy_pipe_data(gss_auth->pipe[1]); gss_mech_put(gss_auth->mech); From 70abc49b4f4a4ef04a6bd9852edbd047b480bed7 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 12 Jan 2012 22:07:51 +0400 Subject: [PATCH 060/316] SUNRPC: make SUNPRC clients list per network namespace context This patch moves static SUNRPC clients list and it's lock to sunrpc_net structure. Currently this list is used only for debug purposes. But later it will be used also for selecting clients by networks namespace on PipeFS mount/umount events. Per-network namespace lists will make this faster and simplier. Note: client list is taken from "init_net" network namespace context in rpc_show_tasks(). This will be changed some day later with making SUNRPC sysctl's per network namespace context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++- net/sunrpc/clnt.c | 26 +++++++++++++++----------- net/sunrpc/netns.h | 3 +++ net/sunrpc/sunrpc_syms.c | 3 +++ net/sunrpc/sysctl.c | 4 +++- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index e7756896f3ca..b16243a35f0b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -244,7 +244,8 @@ int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG -void rpc_show_tasks(void); +struct net; +void rpc_show_tasks(struct net *); #endif int rpc_init_mempool(void); void rpc_destroy_mempool(void); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5ef192c1a57c..90e82c5daeb6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -38,6 +38,7 @@ #include #include "sunrpc.h" +#include "netns.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -50,8 +51,6 @@ /* * All RPC clients are linked into this list */ -static LIST_HEAD(all_clients); -static DEFINE_SPINLOCK(rpc_client_lock); static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -81,16 +80,20 @@ static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { - spin_lock(&rpc_client_lock); - list_add(&clnt->cl_clients, &all_clients); - spin_unlock(&rpc_client_lock); + struct sunrpc_net *sn = net_generic(clnt->cl_xprt->xprt_net, sunrpc_net_id); + + spin_lock(&sn->rpc_client_lock); + list_add(&clnt->cl_clients, &sn->all_clients); + spin_unlock(&sn->rpc_client_lock); } static void rpc_unregister_client(struct rpc_clnt *clnt) { - spin_lock(&rpc_client_lock); + struct sunrpc_net *sn = net_generic(clnt->cl_xprt->xprt_net, sunrpc_net_id); + + spin_lock(&sn->rpc_client_lock); list_del(&clnt->cl_clients); - spin_unlock(&rpc_client_lock); + spin_unlock(&sn->rpc_client_lock); } static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) @@ -1883,14 +1886,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt, task->tk_action, rpc_waitq); } -void rpc_show_tasks(void) +void rpc_show_tasks(struct net *net) { struct rpc_clnt *clnt; struct rpc_task *task; int header = 0; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - spin_lock(&rpc_client_lock); - list_for_each_entry(clnt, &all_clients, cl_clients) { + spin_lock(&sn->rpc_client_lock); + list_for_each_entry(clnt, &sn->all_clients, cl_clients) { spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { @@ -1901,6 +1905,6 @@ void rpc_show_tasks(void) } spin_unlock(&clnt->cl_lock); } - spin_unlock(&rpc_client_lock); + spin_unlock(&sn->rpc_client_lock); } #endif diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 11d2f4863403..0f3af34fa502 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -12,6 +12,9 @@ struct sunrpc_net { struct super_block *pipefs_sb; struct mutex pipefs_sb_lock; + + struct list_head all_clients; + spinlock_t rpc_client_lock; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 7086d11589c8..b4217dc8599c 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -29,6 +29,7 @@ int sunrpc_net_id; static __net_init int sunrpc_init_net(struct net *net) { int err; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); err = rpc_proc_init(net); if (err) @@ -39,6 +40,8 @@ static __net_init int sunrpc_init_net(struct net *net) goto err_ipmap; rpc_pipefs_init_net(net); + INIT_LIST_HEAD(&sn->all_clients); + spin_lock_init(&sn->rpc_client_lock); return 0; err_ipmap: diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index e65dcc613339..af7d339add9d 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -20,6 +20,8 @@ #include #include +#include "netns.h" + /* * Declare the debug flags here */ @@ -110,7 +112,7 @@ proc_dodebug(ctl_table *table, int write, *(unsigned int *) table->data = value; /* Display the RPC tasks on writing to rpc_debug */ if (strcmp(table->procname, "rpc_debug") == 0) - rpc_show_tasks(); + rpc_show_tasks(&init_net); } else { if (!access_ok(VERIFY_WRITE, buffer, left)) return -EFAULT; From 80df9d202255071c8ec610a6a3fdca5cac69f7bd Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:17 +0400 Subject: [PATCH 061/316] SUNRPC: subscribe RPC clients to pipefs notifications This patch subscribes RPC clients to RPC pipefs notifications. RPC clients notifier block is registering with pipefs initialization during SUNRPC module init. This notifier callback is responsible for RPC client PipeFS directory and GSS pipes creation. For pipes creation and destruction two additional callbacks were added to struct rpc_authops. Note that no locking required in notifier callback because PipeFS superblock pointer is passed as an argument from it's creation or destruction routine and thus we can be sure about it's validity. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 + net/sunrpc/auth_gss/auth_gss.c | 10 +++-- net/sunrpc/clnt.c | 69 +++++++++++++++++++++++++++++++++- net/sunrpc/rpc_pipe.c | 19 +++++++--- net/sunrpc/sunrpc.h | 2 + 5 files changed, 92 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 7874a8a56638..492a36d72829 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -99,6 +99,8 @@ struct rpc_authops { struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); + int (*pipes_create)(struct rpc_auth *); + void (*pipes_destroy)(struct rpc_auth *); }; struct rpc_credops { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1a6fa9173519..9da2d837b512 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -762,8 +762,10 @@ static void gss_pipes_dentries_destroy(struct rpc_auth *auth) struct gss_auth *gss_auth; gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->pipe[0]->dentry); - rpc_unlink(gss_auth->pipe[1]->dentry); + if (gss_auth->pipe[0]->dentry) + rpc_unlink(gss_auth->pipe[0]->dentry); + if (gss_auth->pipe[1]->dentry) + rpc_unlink(gss_auth->pipe[1]->dentry); } static int gss_pipes_dentries_create(struct rpc_auth *auth) @@ -1614,7 +1616,9 @@ static const struct rpc_authops authgss_ops = { .create = gss_create, .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, - .crcreate = gss_create_cred + .crcreate = gss_create_cred, + .pipes_create = gss_pipes_dentries_create, + .pipes_destroy = gss_pipes_dentries_destroy, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 90e82c5daeb6..417074500592 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -98,8 +98,11 @@ static void rpc_unregister_client(struct rpc_clnt *clnt) static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { - if (clnt->cl_path.dentry) + if (clnt->cl_path.dentry) { + if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) + clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); rpc_remove_client_dir(clnt->cl_path.dentry); + } clnt->cl_path.dentry = NULL; } @@ -181,6 +184,70 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) return 0; } +static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, + struct super_block *sb) +{ + struct dentry *dentry; + int err = 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + if (clnt->cl_program->pipe_dir_name == NULL) + break; + dentry = rpc_setup_pipedir_sb(sb, clnt, + clnt->cl_program->pipe_dir_name); + BUG_ON(dentry == NULL); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + clnt->cl_path.dentry = dentry; + if (clnt->cl_auth->au_ops->pipes_create) { + err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth); + if (err) + __rpc_clnt_remove_pipedir(clnt); + } + break; + case RPC_PIPEFS_UMOUNT: + __rpc_clnt_remove_pipedir(clnt); + break; + default: + printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); + return -ENOTSUPP; + } + return err; +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct rpc_clnt *clnt; + int error = 0; + struct sunrpc_net *sn = net_generic(sb->s_fs_info, sunrpc_net_id); + + spin_lock(&sn->rpc_client_lock); + list_for_each_entry(clnt, &sn->all_clients, cl_clients) { + error = __rpc_pipefs_event(clnt, event, sb); + if (error) + break; + } + spin_unlock(&sn->rpc_client_lock); + return error; +} + +static struct notifier_block rpc_clients_block = { + .notifier_call = rpc_pipefs_event, +}; + +int rpc_clients_notifier_register(void) +{ + return rpc_pipefs_notifier_register(&rpc_clients_block); +} + +void rpc_clients_notifier_unregister(void) +{ + return rpc_pipefs_notifier_unregister(&rpc_clients_block); +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_program *program = args->program; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6dd8b96e8df7..910de4169a8d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -937,7 +937,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @dentry: directory to remove + * @clnt: rpc client */ int rpc_remove_client_dir(struct dentry *dentry) { @@ -1188,17 +1188,24 @@ int register_rpc_pipefs(void) init_once); if (!rpc_inode_cachep) return -ENOMEM; + err = rpc_clients_notifier_register(); + if (err) + goto err_notifier; err = register_filesystem(&rpc_pipe_fs_type); - if (err) { - kmem_cache_destroy(rpc_inode_cachep); - return err; - } - + if (err) + goto err_register; return 0; + +err_register: + rpc_clients_notifier_unregister(); +err_notifier: + kmem_cache_destroy(rpc_inode_cachep); + return err; } void unregister_rpc_pipefs(void) { + rpc_clients_notifier_unregister(); kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 90c292e2738b..14c9f6d1c5ff 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -47,5 +47,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); +int rpc_clients_notifier_register(void); +void rpc_clients_notifier_unregister(void); #endif /* _NET_SUNRPC_SUNRPC_H */ From f5131257f771ad0e84cf0314a2a86b66318755a9 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:26 +0400 Subject: [PATCH 062/316] SUNRPC: remove RPC client pipefs dentries after unregister Without this patch we have races: rpc_fill_super rpc_free_client rpc_pipefs_event(MOUNT) rpc_remove_pipedir spin_lock(&rpc_client_lock); rpc_setup_pipedir_sb spin_unlock(&rpc_client_lock); spin_lock(&rpc_client_lock); (remove from list) spin_unlock(&rpc_client_lock); MEAMORY LEAKED Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 417074500592..c89ceb80fe87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -578,7 +578,6 @@ rpc_free_client(struct rpc_clnt *clnt) { dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - rpc_clnt_remove_pipedir(clnt); if (clnt->cl_parent != clnt) { rpc_release_client(clnt->cl_parent); goto out_free; @@ -587,6 +586,7 @@ rpc_free_client(struct rpc_clnt *clnt) kfree(clnt->cl_server); out_free: rpc_unregister_client(clnt); + rpc_clnt_remove_pipedir(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; From 70fe25b6e1a535f09792d1ed7012036c7bd506b2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:34 +0400 Subject: [PATCH 063/316] SUNRPC: remove RPC pipefs mount point manipulations from RPC clients code v2: 1) Updated due to changes in the first patch of the series. Now, with RPC pipefs mount notifications handling in RPC clients, we can remove mount point creation and destruction. RPC clients dentries will be created on PipeFS mount event and removed on umount event. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c89ceb80fe87..e3ced3061212 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -109,17 +109,12 @@ static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { struct super_block *pipefs_sb; - int put_mnt = 0; pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); if (pipefs_sb) { - if (clnt->cl_path.dentry) - put_mnt = 1; __rpc_clnt_remove_pipedir(clnt); rpc_put_sb_net(clnt->cl_xprt->xprt_net); } - if (put_mnt) - rpc_put_mount(); } static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, @@ -165,21 +160,13 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) clnt->cl_path.dentry = NULL; if (dir_name == NULL) return 0; - - path.mnt = rpc_get_mount(); - if (IS_ERR(path.mnt)) - return PTR_ERR(path.mnt); pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); - if (!pipefs_sb) { - rpc_put_mount(); - return -ENOENT; - } + if (!pipefs_sb) + return 0; path.dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); rpc_put_sb_net(clnt->cl_xprt->xprt_net); - if (IS_ERR(path.dentry)) { - rpc_put_mount(); + if (IS_ERR(path.dentry)) return PTR_ERR(path.dentry); - } clnt->cl_path = path; return 0; } From 30507f58ce11e7664512059c708347d7a7d75271 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Jan 2012 19:18:42 +0400 Subject: [PATCH 064/316] SUNRPC: remove RPC PipeFS mount point reference from RPC client This is a cleanup patch. We don't need this reference anymore. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 ++-- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/auth_gss/auth_gss.c | 8 ++++---- net/sunrpc/clnt.c | 21 ++++++++++----------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2992cb854e12..588d7da5b17e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -451,8 +451,8 @@ nfs_idmap_new(struct nfs_client *clp) return error; } - if (clp->cl_rpcclient->cl_path.dentry) - pipe->dentry = rpc_mkpipe_dentry(clp->cl_rpcclient->cl_path.dentry, + if (clp->cl_rpcclient->cl_dentry) + pipe->dentry = rpc_mkpipe_dentry(clp->cl_rpcclient->cl_dentry, "idmap", idmap, pipe); if (IS_ERR(pipe->dentry)) { error = PTR_ERR(pipe->dentry); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2c5993a17c33..bfd61852b718 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -57,7 +57,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - struct path cl_path; + struct dentry * cl_dentry; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 9da2d837b512..5ebb602cabe0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -777,12 +777,12 @@ static int gss_pipes_dentries_create(struct rpc_auth *auth) gss_auth = container_of(auth, struct gss_auth, rpc_auth); clnt = gss_auth->client; - gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, "gssd", clnt, gss_auth->pipe[1]); if (IS_ERR(gss_auth->pipe[1]->dentry)) return PTR_ERR(gss_auth->pipe[1]->dentry); - gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_path.dentry, + gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, gss_auth->pipe[0]); if (IS_ERR(gss_auth->pipe[0]->dentry)) { @@ -804,7 +804,7 @@ static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, sb = rpc_get_sb_net(net); if (sb) { - if (clnt->cl_path.dentry) + if (clnt->cl_dentry) gss_pipes_dentries_destroy(auth); rpc_put_sb_net(net); } @@ -819,7 +819,7 @@ static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, sb = rpc_get_sb_net(net); if (sb) { - if (clnt->cl_path.dentry) + if (clnt->cl_dentry) err = gss_pipes_dentries_create(auth); rpc_put_sb_net(net); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e3ced3061212..ed7c22de9319 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -98,12 +98,12 @@ static void rpc_unregister_client(struct rpc_clnt *clnt) static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { - if (clnt->cl_path.dentry) { + if (clnt->cl_dentry) { if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); - rpc_remove_client_dir(clnt->cl_path.dentry); + rpc_remove_client_dir(clnt->cl_dentry); } - clnt->cl_path.dentry = NULL; + clnt->cl_dentry = NULL; } static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) @@ -154,20 +154,19 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { struct super_block *pipefs_sb; - struct path path; + struct dentry *dentry; - clnt->cl_path.mnt = ERR_PTR(-ENOENT); - clnt->cl_path.dentry = NULL; + clnt->cl_dentry = NULL; if (dir_name == NULL) return 0; pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); if (!pipefs_sb) return 0; - path.dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); + dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); rpc_put_sb_net(clnt->cl_xprt->xprt_net); - if (IS_ERR(path.dentry)) - return PTR_ERR(path.dentry); - clnt->cl_path = path; + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + clnt->cl_dentry = dentry; return 0; } @@ -186,7 +185,7 @@ static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, BUG_ON(dentry == NULL); if (IS_ERR(dentry)) return PTR_ERR(dentry); - clnt->cl_path.dentry = dentry; + clnt->cl_dentry = dentry; if (clnt->cl_auth->au_ops->pipes_create) { err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth); if (err) From 820f9442e711a81749e70c40f149fc54c4ce0ca8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:12:40 +0300 Subject: [PATCH 065/316] SUNRPC: split cache creation and PipeFS registration This precursor patch splits SUNRPC cache creation and PipeFS registartion. It's required for latter split of NFS DNS resolver cache creation per network namespace context and PipeFS registration/unregistration on MOUNT/UMOUNT events. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 3 +++ include/linux/sunrpc/cache.h | 2 ++ net/sunrpc/cache.c | 12 +++++------- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index c98b439332fc..d62a8951cb12 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -120,6 +120,7 @@ int nfs_cache_register(struct cache_detail *cd) mnt = rpc_get_mount(); if (IS_ERR(mnt)) return PTR_ERR(mnt); + sunrpc_init_cache_detail(cd); ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); if (ret) goto err; @@ -128,6 +129,7 @@ int nfs_cache_register(struct cache_detail *cd) if (!ret) return ret; err: + sunrpc_destroy_cache_detail(cd); rpc_put_mount(); return ret; } @@ -135,6 +137,7 @@ err: void nfs_cache_unregister(struct cache_detail *cd) { sunrpc_cache_unregister_pipefs(cd); + sunrpc_destroy_cache_detail(cd); rpc_put_mount(); } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 57531f8e5956..590a8ab0cec3 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -202,6 +202,8 @@ extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister(struct cache_detail *cd); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); +extern void sunrpc_init_cache_detail(struct cache_detail *cd); +extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 465df9ae1046..fefe06729f9d 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -344,7 +344,7 @@ static int current_index; static void do_cache_clean(struct work_struct *work); static struct delayed_work cache_cleaner; -static void sunrpc_init_cache_detail(struct cache_detail *cd) +void sunrpc_init_cache_detail(struct cache_detail *cd) { rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); @@ -360,8 +360,9 @@ static void sunrpc_init_cache_detail(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); } +EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); -static void sunrpc_destroy_cache_detail(struct cache_detail *cd) +void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -384,6 +385,7 @@ static void sunrpc_destroy_cache_detail(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } +EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); /* clean cache tries to find something to clean * and cleans it. @@ -1787,17 +1789,14 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, struct dentry *dir; int ret = 0; - sunrpc_init_cache_detail(cd); q.name = name; q.len = strlen(name); q.hash = full_name_hash(q.name, q.len); dir = rpc_create_cache_dir(parent, &q, umode, cd); if (!IS_ERR(dir)) cd->u.pipefs.dir = dir; - else { - sunrpc_destroy_cache_detail(cd); + else ret = PTR_ERR(dir); - } return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); @@ -1806,7 +1805,6 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { rpc_remove_cache_dir(cd->u.pipefs.dir); cd->u.pipefs.dir = NULL; - sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); From 9222b955065dbb047b8db9eb2431979bff3ce700 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:12:48 +0300 Subject: [PATCH 066/316] NFS: split cache creation and PipeFS registration This precursor patch splits NFS cache creation and PipeFS registartion. It's required for latter split of NFS DNS resolver cache creation per network namespace context and PipeFS registration/unregistration on MOUNT/UMOUNT events. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 11 +++++++++-- fs/nfs/cache_lib.h | 2 ++ fs/nfs/dns_resolve.c | 11 ++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index d62a8951cb12..9d79a2eaab27 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -120,7 +120,6 @@ int nfs_cache_register(struct cache_detail *cd) mnt = rpc_get_mount(); if (IS_ERR(mnt)) return PTR_ERR(mnt); - sunrpc_init_cache_detail(cd); ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); if (ret) goto err; @@ -129,7 +128,6 @@ int nfs_cache_register(struct cache_detail *cd) if (!ret) return ret; err: - sunrpc_destroy_cache_detail(cd); rpc_put_mount(); return ret; } @@ -141,3 +139,12 @@ void nfs_cache_unregister(struct cache_detail *cd) rpc_put_mount(); } +void nfs_cache_init(struct cache_detail *cd) +{ + sunrpc_init_cache_detail(cd); +} + +void nfs_cache_destroy(struct cache_detail *cd) +{ + sunrpc_destroy_cache_detail(cd); +} diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 7cf6cafcc007..815dd6651c9f 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,5 +23,7 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); +extern void nfs_cache_init(struct cache_detail *cd); +extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register(struct cache_detail *cd); extern void nfs_cache_unregister(struct cache_detail *cd); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a6e711ad130f..619dea6b5ccf 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -361,12 +361,21 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, int nfs_dns_resolver_init(void) { - return nfs_cache_register(&nfs_dns_resolve); + int err; + + nfs_cache_init(&nfs_dns_resolve); + err = nfs_cache_register(&nfs_dns_resolve); + if (err) { + nfs_cache_destroy(&nfs_dns_resolve); + return err; + } + return 0; } void nfs_dns_resolver_destroy(void) { nfs_cache_unregister(&nfs_dns_resolve); + nfs_cache_destroy(&nfs_dns_resolve); } #endif From 5c1cacb175185ed925d7dc13ac7e0653e7a633cd Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:12:56 +0300 Subject: [PATCH 067/316] NFS: handle NFS caches dentries by network namespace aware routines This patch makes NFS caches PipeFS dentries allocated and destroyed in network namespace context by PipeFS network namespace aware routines. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 44 +++++++++++++++++++++++++++++++++++--------- fs/nfs/cache_lib.h | 4 ++-- fs/nfs/dns_resolve.c | 4 ++-- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 9d79a2eaab27..5dd017bbb7a2 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cache_lib.h" @@ -111,20 +112,34 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) return 0; } -int nfs_cache_register(struct cache_detail *cd) +static int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) +{ + int ret; + struct dentry *dir; + + dir = rpc_d_lookup_sb(sb, "cache"); + BUG_ON(dir == NULL); + ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); + dput(dir); + return ret; +} + +int nfs_cache_register_net(struct net *net, struct cache_detail *cd) { struct vfsmount *mnt; - struct path path; + struct super_block *pipefs_sb; int ret; mnt = rpc_get_mount(); if (IS_ERR(mnt)) return PTR_ERR(mnt); - ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); - if (ret) + pipefs_sb = rpc_get_sb_net(net); + if (!pipefs_sb) { + ret = -ENOENT; goto err; - ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); - path_put(&path); + } + ret = nfs_cache_register_sb(pipefs_sb, cd); + rpc_put_sb_net(net); if (!ret) return ret; err: @@ -132,10 +147,21 @@ err: return ret; } -void nfs_cache_unregister(struct cache_detail *cd) +static void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) { - sunrpc_cache_unregister_pipefs(cd); - sunrpc_destroy_cache_detail(cd); + if (cd->u.pipefs.dir) + sunrpc_cache_unregister_pipefs(cd); +} + +void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) +{ + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + nfs_cache_unregister_sb(pipefs_sb, cd); + rpc_put_sb_net(net); + } rpc_put_mount(); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 815dd6651c9f..e0a6cc4b01b9 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -25,5 +25,5 @@ extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); extern void nfs_cache_init(struct cache_detail *cd); extern void nfs_cache_destroy(struct cache_detail *cd); -extern int nfs_cache_register(struct cache_detail *cd); -extern void nfs_cache_unregister(struct cache_detail *cd); +extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); +extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 619dea6b5ccf..3cbf4b88f827 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -364,7 +364,7 @@ int nfs_dns_resolver_init(void) int err; nfs_cache_init(&nfs_dns_resolve); - err = nfs_cache_register(&nfs_dns_resolve); + err = nfs_cache_register_net(&init_net, &nfs_dns_resolve); if (err) { nfs_cache_destroy(&nfs_dns_resolve); return err; @@ -374,7 +374,7 @@ int nfs_dns_resolver_init(void) void nfs_dns_resolver_destroy(void) { - nfs_cache_unregister(&nfs_dns_resolve); + nfs_cache_unregister_net(&init_net, &nfs_dns_resolve); nfs_cache_destroy(&nfs_dns_resolve); } From 1b340d0118da1d7c60c664f17d7c8fce2bb1cd9d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:13:04 +0300 Subject: [PATCH 068/316] NFS: DNS resolver cache per network namespace context introduced This patch implements DNS resolver cache creation and registration for each alive network namespace context. This was done by registering NFS per-net operations, responsible for DNS cache allocation/register and unregister/destructioning instead of initialization and destruction of static "nfs_dns_resolve" cache detail (this one was removed). Pointer to network dns resolver cache is stored in new per-net "nfs_net" structure. This patch also changes nfs_dns_resolve_name() function prototype (and it's calls) by adding network pointer parameter, which is used to get proper DNS resolver cache pointer for do_cache_lookup_wait() call. Note: empty nfs_dns_resolver_init() and nfs_dns_resolver_destroy() functions will be used in next patch in the series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 96 +++++++++++++++++++++++++++--------------- fs/nfs/dns_resolve.h | 14 +++++- fs/nfs/inode.c | 33 +++++++++++++-- fs/nfs/netns.h | 13 ++++++ fs/nfs/nfs4namespace.c | 8 ++-- 5 files changed, 123 insertions(+), 41 deletions(-) create mode 100644 fs/nfs/netns.h diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 3cbf4b88f827..9aea78ab86ac 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -11,7 +11,7 @@ #include #include -ssize_t nfs_dns_resolve_name(char *name, size_t namelen, +ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, struct sockaddr *sa, size_t salen) { ssize_t ret; @@ -43,12 +43,11 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, #include "dns_resolve.h" #include "cache_lib.h" +#include "netns.h" #define NFS_DNS_HASHBITS 4 #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) -static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; - struct nfs_dns_ent { struct cache_head h; @@ -259,21 +258,6 @@ out: return ret; } -static struct cache_detail nfs_dns_resolve = { - .owner = THIS_MODULE, - .hash_size = NFS_DNS_HASHTBL_SIZE, - .hash_table = nfs_dns_table, - .name = "dns_resolve", - .cache_put = nfs_dns_ent_put, - .cache_upcall = nfs_dns_upcall, - .cache_parse = nfs_dns_parse, - .cache_show = nfs_dns_show, - .match = nfs_dns_match, - .init = nfs_dns_ent_init, - .update = nfs_dns_ent_update, - .alloc = nfs_dns_ent_alloc, -}; - static int do_cache_lookup(struct cache_detail *cd, struct nfs_dns_ent *key, struct nfs_dns_ent **item, @@ -336,8 +320,8 @@ out: return ret; } -ssize_t nfs_dns_resolve_name(char *name, size_t namelen, - struct sockaddr *sa, size_t salen) +ssize_t nfs_dns_resolve_name(struct net *net, char *name, + size_t namelen, struct sockaddr *sa, size_t salen) { struct nfs_dns_ent key = { .hostname = name, @@ -345,37 +329,83 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, }; struct nfs_dns_ent *item = NULL; ssize_t ret; + struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); + ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item); if (ret == 0) { if (salen >= item->addrlen) { memcpy(sa, &item->addr, item->addrlen); ret = item->addrlen; } else ret = -EOVERFLOW; - cache_put(&item->h, &nfs_dns_resolve); + cache_put(&item->h, nn->nfs_dns_resolve); } else if (ret == -ENOENT) ret = -ESRCH; return ret; } +int nfs_dns_resolver_cache_init(struct net *net) +{ + int err = -ENOMEM; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd; + struct cache_head **tbl; + + cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + goto err_cd; + + tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), + GFP_KERNEL); + if (tbl == NULL) + goto err_tbl; + + cd->owner = THIS_MODULE, + cd->hash_size = NFS_DNS_HASHTBL_SIZE, + cd->hash_table = tbl, + cd->name = "dns_resolve", + cd->cache_put = nfs_dns_ent_put, + cd->cache_upcall = nfs_dns_upcall, + cd->cache_parse = nfs_dns_parse, + cd->cache_show = nfs_dns_show, + cd->match = nfs_dns_match, + cd->init = nfs_dns_ent_init, + cd->update = nfs_dns_ent_update, + cd->alloc = nfs_dns_ent_alloc, + + nfs_cache_init(cd); + err = nfs_cache_register_net(net, cd); + if (err) + goto err_reg; + nn->nfs_dns_resolve = cd; + return 0; + +err_reg: + nfs_cache_destroy(cd); + kfree(cd->hash_table); +err_tbl: + kfree(cd); +err_cd: + return err; +} + +void nfs_dns_resolver_cache_destroy(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd = nn->nfs_dns_resolve; + + nfs_cache_unregister_net(net, cd); + nfs_cache_destroy(cd); + kfree(cd->hash_table); + kfree(cd); +} + int nfs_dns_resolver_init(void) { - int err; - - nfs_cache_init(&nfs_dns_resolve); - err = nfs_cache_register_net(&init_net, &nfs_dns_resolve); - if (err) { - nfs_cache_destroy(&nfs_dns_resolve); - return err; - } return 0; } void nfs_dns_resolver_destroy(void) { - nfs_cache_unregister_net(&init_net, &nfs_dns_resolve); - nfs_cache_destroy(&nfs_dns_resolve); } - #endif diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index 199bb5543a91..2e4f596d2923 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h @@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void) static inline void nfs_dns_resolver_destroy(void) {} + +static inline int nfs_dns_resolver_cache_init(struct net *net) +{ + return 0; +} + +static inline void nfs_dns_resolver_cache_destroy(struct net *net) +{} #else extern int nfs_dns_resolver_init(void); extern void nfs_dns_resolver_destroy(void); +extern int nfs_dns_resolver_cache_init(struct net *net); +extern void nfs_dns_resolver_cache_destroy(struct net *net); #endif -extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, - struct sockaddr *sa, size_t salen); +extern ssize_t nfs_dns_resolve_name(struct net *net, char *name, + size_t namelen, struct sockaddr *sa, size_t salen); #endif diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f649fba8c384..0335f6e4ff7e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -51,6 +51,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -1552,6 +1553,25 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } +int nfs_net_id; + +static int nfs_net_init(struct net *net) +{ + return nfs_dns_resolver_cache_init(net); +} + +static void nfs_net_exit(struct net *net) +{ + nfs_dns_resolver_cache_destroy(net); +} + +static struct pernet_operations nfs_net_ops = { + .init = nfs_net_init, + .exit = nfs_net_exit, + .id = &nfs_net_id, + .size = sizeof(struct nfs_net), +}; + /* * Initialize NFS */ @@ -1561,9 +1581,13 @@ static int __init init_nfs_fs(void) err = nfs_idmap_init(); if (err < 0) - goto out9; + goto out10; err = nfs_dns_resolver_init(); + if (err < 0) + goto out9; + + err = register_pernet_subsys(&nfs_net_ops); if (err < 0) goto out8; @@ -1625,10 +1649,12 @@ out5: out6: nfs_fscache_unregister(); out7: - nfs_dns_resolver_destroy(); + unregister_pernet_subsys(&nfs_net_ops); out8: - nfs_idmap_quit(); + nfs_dns_resolver_destroy(); out9: + nfs_idmap_quit(); +out10: return err; } @@ -1640,6 +1666,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); nfs_fscache_unregister(); + unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); nfs_idmap_quit(); #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h new file mode 100644 index 000000000000..8c1f130d6ca2 --- /dev/null +++ b/fs/nfs/netns.h @@ -0,0 +1,13 @@ +#ifndef __NFS_NETNS_H__ +#define __NFS_NETNS_H__ + +#include +#include + +struct nfs_net { + struct cache_detail *nfs_dns_resolve; +}; + +extern int nfs_net_id; + +#endif diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index bb80c49b6533..919a36935924 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -94,13 +94,14 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen) + struct sockaddr *sa, size_t salen, struct nfs_server *server) { ssize_t ret; ret = rpc_pton(string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(string, len, sa, salen); + ret = nfs_dns_resolve_name(server->client->cl_xprt->xprt_net, + string, len, sa, salen); if (ret < 0) ret = 0; } @@ -137,7 +138,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize); + mountdata->addr, addr_bufsize, + NFS_SB(mountdata->sb)); if (mountdata->addrlen == 0) continue; From 9df69c81b469780b64f9b26bb87c048613fdeddf Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:13:12 +0300 Subject: [PATCH 069/316] NFS: DNS resolver PipeFS notifier introduced This patch subscribes DNS resolver caches to RPC pipefs notifications. Notifier is registering on NFS module load. This notifier callback is responsible for creation/destruction of PipeFS DNS resolver cache directory. Note that no locking required in notifier callback because PipeFS superblock pointer is passed as an argument from it's creation or destruction routine and thus we can be sure about it's validity. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 4 ++-- fs/nfs/cache_lib.h | 4 ++++ fs/nfs/dns_resolve.c | 38 +++++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 5dd017bbb7a2..5905a31211e5 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -112,7 +112,7 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) return 0; } -static int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) +int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) { int ret; struct dentry *dir; @@ -147,7 +147,7 @@ err: return ret; } -static void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) +void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) { if (cd->u.pipefs.dir) sunrpc_cache_unregister_pipefs(cd); diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index e0a6cc4b01b9..317db95e37f8 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -27,3 +27,7 @@ extern void nfs_cache_init(struct cache_detail *cd); extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); +extern int nfs_cache_register_sb(struct super_block *sb, + struct cache_detail *cd); +extern void nfs_cache_unregister_sb(struct super_block *sb, + struct cache_detail *cd); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 9aea78ab86ac..200eb67c95d9 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -40,6 +40,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include +#include #include "dns_resolve.h" #include "cache_lib.h" @@ -400,12 +401,47 @@ void nfs_dns_resolver_cache_destroy(struct net *net) kfree(cd); } +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd = nn->nfs_dns_resolve; + int ret = 0; + + if (cd == NULL) + return 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + ret = nfs_cache_register_sb(sb, cd); + break; + case RPC_PIPEFS_UMOUNT: + nfs_cache_unregister_sb(sb, cd); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfs_dns_resolver_block = { + .notifier_call = rpc_pipefs_event, +}; + int nfs_dns_resolver_init(void) { - return 0; + return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); } void nfs_dns_resolver_destroy(void) { + rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); } #endif From 39cb67b9a04300df41e201d9e6392691cdad080f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 Nov 2011 17:13:20 +0300 Subject: [PATCH 070/316] NFS: remove RPC PipeFS mount point references from NFS cache routines This is a cleanup patch. We don't need this reference anymore, because DNS resolver cache now creates it's dentries in per-net operations and on PipeFS mount/umount notification. Note that nfs_cache_register_net() now returns 0 instead of -ENOENT in case of PiepFS superblock absence. This is ok, Dns resolver cache will be regestered on PipeFS mount event. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 5905a31211e5..dded26368111 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -126,24 +126,14 @@ int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) int nfs_cache_register_net(struct net *net, struct cache_detail *cd) { - struct vfsmount *mnt; struct super_block *pipefs_sb; - int ret; + int ret = 0; - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) { - ret = -ENOENT; - goto err; + if (pipefs_sb) { + ret = nfs_cache_register_sb(pipefs_sb, cd); + rpc_put_sb_net(net); } - ret = nfs_cache_register_sb(pipefs_sb, cd); - rpc_put_sb_net(net); - if (!ret) - return ret; -err: - rpc_put_mount(); return ret; } @@ -162,7 +152,6 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } - rpc_put_mount(); } void nfs_cache_init(struct cache_detail *cd) From ad6b134008f4e765dd19976552b929273ae523bd Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:12:38 +0400 Subject: [PATCH 071/316] SUNRPC: fix pipe->ops cleanup on pipe dentry unlink This patch looks late due to GSS AUTH patches sent already. But it fixes a flaw in RPC PipeFS pipes handling. I've added this patch in the series, because this series related to pipes. But it should be a part of previous series named "SUNPRC: cleanup PipeFS for network-namespace-aware users". Pipe dentry can be created and destroyed many times during pipe life cycle. This actually means, that we can't set pipe->ops to NULL in rpc_close_pipes() and use this variable as a flag, indicating, that pipe's dentry is unlinking. To follow this restriction, this patch replaces "pipe->ops = NULL" assignment and checks for NULL with "pipe->dentry = NULL" assignment and checks for NULL respectively. This patch also removes check for non-NULL pipe->ops (or pipe->dentry) in rpc_close_pipes() because it always non-NULL now. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 51 +++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 910de4169a8d..6b417fcabdbf 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -86,10 +86,6 @@ rpc_timeout_upcall_queue(struct work_struct *work) void (*destroy_msg)(struct rpc_pipe_msg *); spin_lock(&pipe->lock); - if (pipe->ops == NULL) { - spin_unlock(&pipe->lock); - return; - } destroy_msg = pipe->ops->destroy_msg; if (pipe->nreaders == 0) { list_splice_init(&pipe->pipe, &free_list); @@ -135,8 +131,6 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) int res = -EPIPE; spin_lock(&pipe->lock); - if (pipe->ops == NULL) - goto out; if (pipe->nreaders) { list_add_tail(&msg->list, &pipe->pipe); pipe->pipelen += msg->len; @@ -150,7 +144,6 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) pipe->pipelen += msg->len; res = 0; } -out: spin_unlock(&pipe->lock); wake_up(&pipe->waitq); return res; @@ -167,27 +160,23 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_pipe *pipe = RPC_I(inode)->pipe; - const struct rpc_pipe_ops *ops; int need_release; + LIST_HEAD(free_list); mutex_lock(&inode->i_mutex); - ops = pipe->ops; - if (ops != NULL) { - LIST_HEAD(free_list); - spin_lock(&pipe->lock); - need_release = pipe->nreaders != 0 || pipe->nwriters != 0; - pipe->nreaders = 0; - list_splice_init(&pipe->in_upcall, &free_list); - list_splice_init(&pipe->pipe, &free_list); - pipe->pipelen = 0; - pipe->ops = NULL; - spin_unlock(&pipe->lock); - rpc_purge_list(pipe, &free_list, ops->destroy_msg, -EPIPE); - pipe->nwriters = 0; - if (need_release && ops->release_pipe) - ops->release_pipe(inode); - cancel_delayed_work_sync(&pipe->queue_timeout); - } + spin_lock(&pipe->lock); + need_release = pipe->nreaders != 0 || pipe->nwriters != 0; + pipe->nreaders = 0; + list_splice_init(&pipe->in_upcall, &free_list); + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; + pipe->dentry = NULL; + spin_unlock(&pipe->lock); + rpc_purge_list(pipe, &free_list, pipe->ops->destroy_msg, -EPIPE); + pipe->nwriters = 0; + if (need_release && pipe->ops->release_pipe) + pipe->ops->release_pipe(inode); + cancel_delayed_work_sync(&pipe->queue_timeout); rpc_inode_setowner(inode, NULL); mutex_unlock(&inode->i_mutex); } @@ -223,7 +212,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) int res = -ENXIO; mutex_lock(&inode->i_mutex); - if (pipe->ops == NULL) + if (pipe->dentry == NULL) goto out; first_open = pipe->nreaders == 0 && pipe->nwriters == 0; if (first_open && pipe->ops->open_pipe) { @@ -249,7 +238,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) int last_close; mutex_lock(&inode->i_mutex); - if (pipe->ops == NULL) + if (pipe->dentry == NULL) goto out; msg = filp->private_data; if (msg != NULL) { @@ -290,7 +279,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) int res = 0; mutex_lock(&inode->i_mutex); - if (pipe->ops == NULL) { + if (pipe->dentry == NULL) { res = -EPIPE; goto out_unlock; } @@ -333,7 +322,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of mutex_lock(&inode->i_mutex); res = -EPIPE; - if (pipe->ops != NULL) + if (pipe->dentry != NULL) res = pipe->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; @@ -348,7 +337,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) poll_wait(filp, &pipe->waitq, wait); mask = POLLOUT | POLLWRNORM; - if (pipe->ops == NULL) + if (pipe->dentry == NULL) mask |= POLLERR | POLLHUP; if (filp->private_data || !list_empty(&pipe->pipe)) mask |= POLLIN | POLLRDNORM; @@ -365,7 +354,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FIONREAD: spin_lock(&pipe->lock); - if (pipe->ops == NULL) { + if (pipe->dentry == NULL) { spin_unlock(&pipe->lock); return -EPIPE; } From e50a7a1a42335243c94eeea4a8d23413cb02370d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:12:46 +0400 Subject: [PATCH 072/316] NFS: make NFS client allocated per network namespace context This patch adds new net variable to nfs_client structure. This variable is set on NFS client creation and cheched during matching NFS client search. Initially current->nsproxy->net_ns is used as network namespace owner for new NFS client to create. This network namespace pointer is set during mount options parsing and thus can be passed from user-spave utils in future if will be necessary. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 16 +++++++++++++--- fs/nfs/internal.h | 1 + fs/nfs/super.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 31778f74357d..ca016fe44602 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -135,6 +135,7 @@ struct nfs_client_initdata { const struct nfs_rpc_ops *rpc_ops; int proto; u32 minorversion; + struct net *net; }; /* @@ -189,6 +190,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ if (!IS_ERR(cred)) clp->cl_machine_cred = cred; nfs_fscache_get_client_cookie(clp); + clp->net = cl_init->net; return clp; @@ -481,6 +483,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat /* Match the full socket address */ if (!nfs_sockaddr_cmp(sap, clap)) continue; + /* Match network namespace */ + if (clp->net != data->net) + continue; atomic_inc(&clp->cl_count); return clp; @@ -831,6 +836,7 @@ static int nfs_init_server(struct nfs_server *server, .addrlen = data->nfs_server.addrlen, .rpc_ops = &nfs_v2_clientops, .proto = data->nfs_server.protocol, + .net = data->net, }; struct rpc_timeout timeparms; struct nfs_client *clp; @@ -1393,7 +1399,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *ip_addr, rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms, - u32 minorversion) + u32 minorversion, struct net *net) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -1402,6 +1408,7 @@ static int nfs4_set_client(struct nfs_server *server, .rpc_ops = &nfs_v4_clientops, .proto = proto, .minorversion = minorversion, + .net = net, }; struct nfs_client *clp; int error; @@ -1453,6 +1460,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, .rpc_ops = &nfs_v4_clientops, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, + .net = mds_clp->net, }; struct rpc_timeout ds_timeout = { .to_initval = 15 * HZ, @@ -1580,7 +1588,8 @@ static int nfs4_init_server(struct nfs_server *server, data->auth_flavors[0], data->nfs_server.protocol, &timeparms, - data->minorversion); + data->minorversion, + data->net); if (error < 0) goto error; @@ -1677,7 +1686,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->authflavor, parent_server->client->cl_xprt->prot, parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version); + parent_client->cl_mvops->minor_version, + parent_client->net); if (error < 0) goto error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8102db9b926c..02fb2001a283 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { } nfs_server; struct security_mnt_opts lsm_opts; + struct net *net; }; /* mount_clnt.c */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3dfa4f112c0a..73aa75649bf8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -1107,6 +1108,8 @@ static int nfs_parse_mount_options(char *raw, free_secdata(secdata); + mnt->net = current->nsproxy->net_ns; + while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; unsigned long option; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ba4d7656ecfd..e0863d35f753 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -85,6 +85,7 @@ struct nfs_client { #endif struct server_scope *server_scope; /* from exchange_id */ + struct net *net; }; /* From 6d59b8d599d594bc314026c6856424fe49df5513 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:12:54 +0400 Subject: [PATCH 073/316] NFS: pass NFS client owner network namespace to RPC client creation routine This patch replaces static "init_net" with nfs_client->net pointer in RPC client creation calls. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- fs/nfs/internal.h | 1 + fs/nfs/mount_clnt.c | 4 ++-- fs/nfs/super.c | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ca016fe44602..64815b725409 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -647,7 +647,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 02fb2001a283..d602188f889f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -138,6 +138,7 @@ struct nfs_mount_request { int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; + struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d4c2d6b7507e..4fbe3a8e5e6b 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 73aa75649bf8..e45feb0fee59 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1625,6 +1625,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = &server_authlist_len, .auth_flavs = server_authlist, + .net = args->net, }; int status; From 7bb782c6ac75898604eb547ed37b05c49b1edf21 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:13:03 +0400 Subject: [PATCH 074/316] NFS: create callback transports in parent transport network namespace This patch replaces static "init_net" references with parent transport xprt_net reference. Thus callback transports will be created in the same network namespace as respective NFS mount point was created. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 516f3375e067..d81040a7efc4 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -102,11 +102,11 @@ nfs4_callback_svc(void *vrqstp) * Prepare to bring up the NFSv4 callback service */ struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv) +nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { int ret; - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, + ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, + ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) * fore channel connection. * Returns the input port (0) and sets the svc_serv bc_xprt on success */ - ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, + ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, SVC_SOCK_ANONYMOUS); if (ret < 0) { rqstp = ERR_PTR(ret); @@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) serv, xprt, &rqstp, &callback_svc); if (!minorversion_setup) { /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv); + rqstp = nfs4_callback_up(serv, xprt); callback_svc = nfs4_callback_svc; } From 4929d1d33fdbe8385cdd49ccd23563e9ff247ff8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:13:11 +0400 Subject: [PATCH 075/316] NFS: handle NFS idmap pipe PipeFS dentries by network namespace aware routines This patch makes NFS idmap pipes dentries allocated and destroyed in network namespace context by PipeFS network namespace aware routines. Network namespace context is obtained from nfs_client structure. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 61 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 588d7da5b17e..769274ed51c4 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -431,6 +431,56 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { .destroy_msg = idmap_pipe_destroy_msg, }; +static void __nfs_idmap_unregister(struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static int __nfs_idmap_register(struct dentry *dir, + struct idmap *idmap, + struct rpc_pipe *pipe) +{ + struct dentry *dentry; + + dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + pipe->dentry = dentry; + return 0; +} + +static void nfs_idmap_unregister(struct nfs_client *clp, + struct rpc_pipe *pipe) +{ + struct net *net = clp->net; + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + __nfs_idmap_unregister(pipe); + rpc_put_sb_net(net); + } +} + +static int nfs_idmap_register(struct nfs_client *clp, + struct idmap *idmap, + struct rpc_pipe *pipe) +{ + struct net *net = clp->net; + struct super_block *pipefs_sb; + int err = 0; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + if (clp->cl_rpcclient->cl_dentry) + err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, + idmap, pipe); + rpc_put_sb_net(net); + } + return err; +} + int nfs_idmap_new(struct nfs_client *clp) { @@ -450,12 +500,8 @@ nfs_idmap_new(struct nfs_client *clp) kfree(idmap); return error; } - - if (clp->cl_rpcclient->cl_dentry) - pipe->dentry = rpc_mkpipe_dentry(clp->cl_rpcclient->cl_dentry, - "idmap", idmap, pipe); - if (IS_ERR(pipe->dentry)) { - error = PTR_ERR(pipe->dentry); + error = nfs_idmap_register(clp, idmap, pipe); + if (error) { rpc_destroy_pipe_data(pipe); kfree(idmap); return error; @@ -478,8 +524,7 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - if (idmap->idmap_pipe->dentry) - rpc_unlink(idmap->idmap_pipe->dentry); + nfs_idmap_unregister(clp, idmap->idmap_pipe); rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; kfree(idmap); From eee17325f1dfbe004f1475743bab9e3d050d00f5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:13:19 +0400 Subject: [PATCH 076/316] NFS: idmap PipeFS notifier introduced v2: 1) Added "nfs_idmap_init" and "nfs_idmap_quit" definitions for kernels built without CONFIG_NFS_V4 option set. This patch subscribes NFS clients to RPC pipefs notifications. Idmap notifier is registering on NFS module load. This notifier callback is responsible for creation/destruction of PipeFS idmap pipe dentry for NFS4 clients. Since ipdmap pipe is created in rpc client pipefs directory, we have make sure, that this directory has been created already. IOW RPC client notifier callback has been called already. To achive this, PipeFS notifier priorities has been introduced (RPC clients notifier priority is greater than NFS idmap one). But this approach gives another problem: unlink for RPC client directory will be called before NFS idmap pipe unlink on UMOUNT event and will fail, because directory is not empty. The solution, introduced in this patch, is to try to remove client directory once again after idmap pipe was unlinked. This looks like ugly hack, so probably it should be replaced in some more elegant way. Note that no locking required in notifier callback because PipeFS superblock pointer is passed as an argument from it's creation or destruction routine and thus we can be sure about it's validity. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 +- fs/nfs/idmap.c | 75 ++++++++++++++++++++++++++++++ fs/nfs/internal.h | 4 ++ include/linux/nfs_idmap.h | 23 ++++----- include/linux/sunrpc/rpc_pipe_fs.h | 7 +++ net/sunrpc/clnt.c | 1 + net/sunrpc/rpc_pipe.c | 16 +++++++ 7 files changed, 117 insertions(+), 13 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 64815b725409..ca9a4aa38dff 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -52,8 +52,8 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -static DEFINE_SPINLOCK(nfs_client_lock); -static LIST_HEAD(nfs_client_list); +DEFINE_SPINLOCK(nfs_client_lock); +LIST_HEAD(nfs_client_list); static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 769274ed51c4..ff084d258c41 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -377,6 +377,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, #include #include "nfs4_fs.h" +#include "internal.h" #define IDMAP_HASH_SZ 128 @@ -530,6 +531,80 @@ nfs_idmap_delete(struct nfs_client *clp) kfree(idmap); } +static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, + struct super_block *sb) +{ + int err = 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); + err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, + clp->cl_idmap, + clp->cl_idmap->idmap_pipe); + break; + case RPC_PIPEFS_UMOUNT: + if (clp->cl_idmap->idmap_pipe) { + struct dentry *parent; + + parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; + __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); + /* + * Note: This is a dirty hack. SUNRPC hook has been + * called already but simple_rmdir() call for the + * directory returned with error because of idmap pipe + * inside. Thus now we have to remove this directory + * here. + */ + if (rpc_rmdir(parent)) + printk(KERN_ERR "%s: failed to remove clnt dir!\n", __func__); + } + break; + default: + printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); + return -ENOTSUPP; + } + return err; +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct nfs_client *clp; + int error = 0; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + if (clp->net != sb->s_fs_info) + continue; + if (clp->rpc_ops != &nfs_v4_clientops) + continue; + error = __rpc_pipefs_event(clp, event, sb); + if (error) + break; + } + spin_unlock(&nfs_client_lock); + return error; +} + +#define PIPEFS_NFS_PRIO 1 + +static struct notifier_block nfs_idmap_block = { + .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_NFS_PRIO, +}; + +int nfs_idmap_init(void) +{ + return rpc_pipefs_notifier_register(&nfs_idmap_block); +} + +void nfs_idmap_quit(void) +{ + rpc_pipefs_notifier_unregister(&nfs_idmap_block); +} + /* * Helper routines for manipulating the hashtable */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d602188f889f..2b9836fe4434 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -182,6 +182,10 @@ static inline void nfs_fs_proc_exit(void) { } #endif +#ifdef CONFIG_NFS_V4 +extern spinlock_t nfs_client_lock; +extern struct list_head nfs_client_list; +#endif /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 308c18877018..3c9eeb7da646 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -69,10 +69,20 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER - +#ifdef CONFIG_NFS_V4 int nfs_idmap_init(void); void nfs_idmap_quit(void); +#else +static inline int nfs_idmap_init(void) +{ + return 0; +} + +static inline void nfs_idmap_quit(void) +{} +#endif + +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER static inline int nfs_idmap_new(struct nfs_client *clp) { @@ -85,15 +95,6 @@ static inline void nfs_idmap_delete(struct nfs_client *clp) #else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ -static inline int nfs_idmap_init(void) -{ - return 0; -} - -static inline void nfs_idmap_quit(void) -{ -} - int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 0d1f748f76da..ca32ebd14c18 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -49,6 +49,11 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } +enum { + SUNRPC_PIPEFS_NFS_PRIO, + SUNRPC_PIPEFS_RPC_PRIO, +}; + extern int rpc_pipefs_notifier_register(struct notifier_block *); extern void rpc_pipefs_notifier_unregister(struct notifier_block *); @@ -78,6 +83,8 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); +extern int rpc_rmdir(struct dentry *dentry); + struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ed7c22de9319..4c6848017168 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -222,6 +222,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, static struct notifier_block rpc_clients_block = { .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_RPC_PRIO, }; int rpc_clients_notifier_register(void) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6b417fcabdbf..bae4e71d8663 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -616,6 +616,22 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) return ret; } +int rpc_rmdir(struct dentry *dentry) +{ + struct dentry *parent; + struct inode *dir; + int error; + + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + error = __rpc_rmdir(dir, dentry); + mutex_unlock(&dir->i_mutex); + dput(parent); + return error; +} +EXPORT_SYMBOL_GPL(rpc_rmdir); + static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; From 332dfab6f4e02d3c5897e9470492bee7d14f29cc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 17:04:16 +0400 Subject: [PATCH 077/316] NFS: handle blocklayout pipe PipeFS dentry by network namespace aware routines This patch makes blocklayout pipe dentry allocated and destroyed in network namespace context by PipeFS network namespace aware routines. Network namespace context is obtained from nfs_client structure. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 61 +++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 848660fd58c4..80b0c4a40485 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1025,10 +1025,55 @@ static const struct rpc_pipe_ops bl_upcall_ops = { .destroy_msg = bl_pipe_destroy_msg, }; +static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, + struct rpc_pipe *pipe) +{ + struct dentry *dir, *dentry; + + dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); + if (dir == NULL) + return ERR_PTR(-ENOENT); + dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); + dput(dir); + return dentry; +} + +static void nfs4blocklayout_unregister_sb(struct super_block *sb, + struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static struct dentry *nfs4blocklayout_register_net(struct net *net, + struct rpc_pipe *pipe) +{ + struct super_block *pipefs_sb; + struct dentry *dentry; + + pipefs_sb = rpc_get_sb_net(net); + if (!pipefs_sb) + return ERR_PTR(-ENOENT); + dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); + rpc_put_sb_net(net); + return dentry; +} + +static void nfs4blocklayout_unregister_net(struct net *net, + struct rpc_pipe *pipe) +{ + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + nfs4blocklayout_unregister_sb(pipefs_sb, pipe); + rpc_put_sb_net(net); + } +} + static int __init nfs4blocklayout_init(void) { struct vfsmount *mnt; - struct path path; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); @@ -1044,21 +1089,13 @@ static int __init nfs4blocklayout_init(void) ret = PTR_ERR(mnt); goto out_remove; } - - ret = vfs_path_lookup(mnt->mnt_root, - mnt, - NFS_PIPE_DIRNAME, 0, &path); - if (ret) - goto out_putrpc; - bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); - path_put(&path); if (IS_ERR(bl_device_pipe)) { ret = PTR_ERR(bl_device_pipe); goto out_putrpc; } - bl_device_pipe->dentry = rpc_mkpipe_dentry(path.dentry, "blocklayout", - NULL, bl_device_pipe); + bl_device_pipe->dentry = nfs4blocklayout_register_net(&init_net, + bl_device_pipe); if (IS_ERR(bl_device_pipe->dentry)) { ret = PTR_ERR(bl_device_pipe->dentry); goto out_destroy_pipe; @@ -1081,7 +1118,7 @@ static void __exit nfs4blocklayout_exit(void) __func__); pnfs_unregister_layoutdriver(&blocklayout_type); - rpc_unlink(bl_device_pipe->dentry); + nfs4blocklayout_unregister_net(&init_net, bl_device_pipe); rpc_destroy_pipe_data(bl_device_pipe); rpc_put_mount(); } From 9e2e74dba6ddce94da187369b50a27536147d5df Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 17:04:24 +0400 Subject: [PATCH 078/316] NFS: blocklayout pipe creation per network namespace context introduced This patch implements blocklayout pipe creation and registration per each existent network namespace. This was achived by registering NFS per-net operations, responsible for blocklayout pipe allocation/register and unregister/destruction instead of initialization and destruction of static "bl_device_pipe" pipe (this one was removed). Note, than pointer to network blocklayout pipe is stored in per-net "nfs_net" structure, because allocating of one more per-net structure for blocklayout module looks redundant. This patch also changes dev_remove() function prototype (and all it's callers, where it' requied) by adding network namespace pointer parameter, which is used to discover proper blocklayout pipe for rpc_queue_upcall() call. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 54 +++++++++++++++++++---------- fs/nfs/blocklayout/blocklayout.h | 3 +- fs/nfs/blocklayout/blocklayoutdev.c | 5 ++- fs/nfs/blocklayout/blocklayoutdm.c | 7 ++-- fs/nfs/inode.c | 1 + fs/nfs/netns.h | 1 + 6 files changed, 47 insertions(+), 24 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 80b0c4a40485..9da72b8a5542 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -46,7 +46,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); -struct rpc_pipe *bl_device_pipe; wait_queue_head_t bl_wq; static void print_page(struct page *page) @@ -1071,6 +1070,37 @@ static void nfs4blocklayout_unregister_net(struct net *net, } } +static int nfs4blocklayout_net_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *dentry; + + nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); + if (IS_ERR(nn->bl_device_pipe)) + return PTR_ERR(nn->bl_device_pipe); + dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); + if (IS_ERR(dentry)) { + rpc_destroy_pipe_data(nn->bl_device_pipe); + return PTR_ERR(dentry); + } + nn->bl_device_pipe->dentry = dentry; + return 0; +} + +static void nfs4blocklayout_net_exit(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); + rpc_destroy_pipe_data(nn->bl_device_pipe); + nn->bl_device_pipe = NULL; +} + +static struct pernet_operations nfs4blocklayout_net_ops = { + .init = nfs4blocklayout_net_init, + .exit = nfs4blocklayout_net_exit, +}; + static int __init nfs4blocklayout_init(void) { struct vfsmount *mnt; @@ -1089,24 +1119,12 @@ static int __init nfs4blocklayout_init(void) ret = PTR_ERR(mnt); goto out_remove; } - bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); - if (IS_ERR(bl_device_pipe)) { - ret = PTR_ERR(bl_device_pipe); - goto out_putrpc; - } - bl_device_pipe->dentry = nfs4blocklayout_register_net(&init_net, - bl_device_pipe); - if (IS_ERR(bl_device_pipe->dentry)) { - ret = PTR_ERR(bl_device_pipe->dentry); - goto out_destroy_pipe; - } + ret = register_pernet_subsys(&nfs4blocklayout_net_ops); + if (ret) + goto out_remove; out: return ret; -out_destroy_pipe: - rpc_destroy_pipe_data(bl_device_pipe); -out_putrpc: - rpc_put_mount(); out_remove: pnfs_unregister_layoutdriver(&blocklayout_type); return ret; @@ -1117,10 +1135,8 @@ static void __exit nfs4blocklayout_exit(void) dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", __func__); + unregister_pernet_subsys(&nfs4blocklayout_net_ops); pnfs_unregister_layoutdriver(&blocklayout_type); - nfs4blocklayout_unregister_net(&init_net, bl_device_pipe); - rpc_destroy_pipe_data(bl_device_pipe); - rpc_put_mount(); } MODULE_ALIAS("nfs-layouttype4-3"); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 49c670b18a9e..0966b39bbcfb 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -37,6 +37,7 @@ #include #include "../pnfs.h" +#include "../netns.h" #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) @@ -50,6 +51,7 @@ struct pnfs_block_dev { struct list_head bm_node; struct nfs4_deviceid bm_mdevid; /* associated devid */ struct block_device *bm_mdev; /* meta device itself */ + struct net *net; }; enum exstate4 { @@ -161,7 +163,6 @@ struct bl_msg_hdr { u16 totallen; /* length of entire message, including hdr itself */ }; -extern struct rpc_pipe *bl_device_pipe; extern wait_queue_head_t bl_wq; #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 949b62478799..94ed978860c0 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -120,6 +120,8 @@ nfs4_blk_decode_device(struct nfs_server *server, DECLARE_WAITQUEUE(wq, current); struct bl_dev_msg *reply = &bl_mount_reply; int offset, len, i, rc; + struct net *net = server->nfs_client->net; + struct nfs_net *nn = net_generic(net, nfs_net_id); dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, @@ -146,7 +148,7 @@ nfs4_blk_decode_device(struct nfs_server *server, dprintk("%s CALLING USERSPACE DAEMON\n", __func__); add_wait_queue(&bl_wq, &wq); - rc = rpc_queue_upcall(bl_device_pipe, &msg); + rc = rpc_queue_upcall(nn->bl_device_pipe, &msg); if (rc < 0) { remove_wait_queue(&bl_wq, &wq); rv = ERR_PTR(rc); @@ -181,6 +183,7 @@ nfs4_blk_decode_device(struct nfs_server *server, rv->bm_mdev = bd; memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); + rv->net = net; dprintk("%s Created device %s with bd_block_size %u\n", __func__, bd->bd_disk->disk_name, diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 631f254d12ab..970490f556de 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -38,7 +38,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static void dev_remove(dev_t dev) +static void dev_remove(struct net *net, dev_t dev) { struct rpc_pipe_msg msg; struct bl_dev_msg bl_umount_request; @@ -48,6 +48,7 @@ static void dev_remove(dev_t dev) }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); + struct nfs_net *nn = net_generic(net, nfs_net_id); dprintk("Entering %s\n", __func__); @@ -66,7 +67,7 @@ static void dev_remove(dev_t dev) msg.len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(bl_device_pipe, &msg) < 0) { + if (rpc_queue_upcall(nn->bl_device_pipe, &msg) < 0) { remove_wait_queue(&bl_wq, &wq); goto out; } @@ -93,7 +94,7 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", __func__, rv); - dev_remove(bdev->bm_mdev->bd_dev); + dev_remove(bdev->net, bdev->bm_mdev->bd_dev); } void bl_free_block_dev(struct pnfs_block_dev *bdev) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0335f6e4ff7e..577ad5a72a24 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1554,6 +1554,7 @@ static void nfsiod_stop(void) } int nfs_net_id; +EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8c1f130d6ca2..39ae4cad5b4b 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -6,6 +6,7 @@ struct nfs_net { struct cache_detail *nfs_dns_resolve; + struct rpc_pipe *bl_device_pipe; }; extern int nfs_net_id; From 627f30668fac12f5bd555a2cc22af2323762fe8d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 17:04:32 +0400 Subject: [PATCH 079/316] NFS: blocklayout PipeFS notifier introduced This patch subscribes blocklayout pipes to RPC pipefs notifications. Notifier is registering on blocklayout module load. This notifier callback is responsible for creation/destruction of PipeFS blocklayout pipe dentry. Note that no locking required in notifier callback because PipeFS superblock pointer is passed as an argument from it's creation or destruction routine and thus we can be sure about it's validity. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 50 +++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9da72b8a5542..df05b9465146 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1044,6 +1044,48 @@ static void nfs4blocklayout_unregister_sb(struct super_block *sb, rpc_unlink(pipe->dentry); } +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *dentry; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + if (nn->bl_device_pipe == NULL) { + module_put(THIS_MODULE); + return 0; + } + + switch (event) { + case RPC_PIPEFS_MOUNT: + dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + break; + } + nn->bl_device_pipe->dentry = dentry; + break; + case RPC_PIPEFS_UMOUNT: + if (nn->bl_device_pipe->dentry) + nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfs4blocklayout_block = { + .notifier_call = rpc_pipefs_event, +}; + static struct dentry *nfs4blocklayout_register_net(struct net *net, struct rpc_pipe *pipe) { @@ -1119,12 +1161,17 @@ static int __init nfs4blocklayout_init(void) ret = PTR_ERR(mnt); goto out_remove; } - ret = register_pernet_subsys(&nfs4blocklayout_net_ops); + ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); if (ret) goto out_remove; + ret = register_pernet_subsys(&nfs4blocklayout_net_ops); + if (ret) + goto out_notifier; out: return ret; +out_notifier: + rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); out_remove: pnfs_unregister_layoutdriver(&blocklayout_type); return ret; @@ -1135,6 +1182,7 @@ static void __exit nfs4blocklayout_exit(void) dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", __func__); + rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); unregister_pernet_subsys(&nfs4blocklayout_net_ops); pnfs_unregister_layoutdriver(&blocklayout_type); } From 2561d618ffb615f92fe17f0cf6b03f8e5cddb2cb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 17:04:40 +0400 Subject: [PATCH 080/316] NFS: remove RPC PipeFS mount point reference from blocklayout routines This is a cleanup patch. We don't need this reference anymore, because blocklayout pipes dentries now creates and destroys in per-net operations and on PipeFS mount/umount notification. Note that nfs4blocklayout_register_net() now returns 0 instead of -ENOENT in case of PipeFS superblock absence. This is ok, because blocklayout pipe dentry will be created on PipeFS mount event. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index df05b9465146..783ebd51bd5f 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1094,7 +1094,7 @@ static struct dentry *nfs4blocklayout_register_net(struct net *net, pipefs_sb = rpc_get_sb_net(net); if (!pipefs_sb) - return ERR_PTR(-ENOENT); + return NULL; dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); rpc_put_sb_net(net); return dentry; @@ -1145,7 +1145,6 @@ static struct pernet_operations nfs4blocklayout_net_ops = { static int __init nfs4blocklayout_init(void) { - struct vfsmount *mnt; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); @@ -1155,12 +1154,6 @@ static int __init nfs4blocklayout_init(void) goto out; init_waitqueue_head(&bl_wq); - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - ret = PTR_ERR(mnt); - goto out_remove; - } ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); if (ret) goto out_remove; From 12bc372b96b35a2dc9245ec61369028932b82ea8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 17:04:48 +0400 Subject: [PATCH 081/316] SUNRPC: kernel PipeFS mount point creation routines removed This patch removes static rpc_mnt variable and its creation and destruction routines, because they are not used anymore. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 2 -- net/sunrpc/rpc_pipe.c | 21 --------------------- 2 files changed, 23 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index ca32ebd14c18..426ce6eeee66 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -90,8 +90,6 @@ void rpc_destroy_pipe_data(struct rpc_pipe *pipe); extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, struct rpc_pipe *); extern int rpc_unlink(struct dentry *); -extern struct vfsmount *rpc_get_mount(void); -extern void rpc_put_mount(void); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index bae4e71d8663..6873c9b51cc9 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -37,9 +36,6 @@ #define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") -static struct vfsmount *rpc_mnt __read_mostly; -static int rpc_mount_count; - static struct file_system_type rpc_pipe_fs_type; @@ -449,23 +445,6 @@ struct rpc_filelist { umode_t mode; }; -struct vfsmount *rpc_get_mount(void) -{ - int err; - - err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count); - if (err != 0) - return ERR_PTR(err); - return rpc_mnt; -} -EXPORT_SYMBOL_GPL(rpc_get_mount); - -void rpc_put_mount(void) -{ - simple_release_fs(&rpc_mnt, &rpc_mount_count); -} -EXPORT_SYMBOL_GPL(rpc_put_mount); - static int rpc_delete_dentry(const struct dentry *dentry) { return 1; From 1313e6034a73a55d6293dbdc62b8853dd067771a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:24 -0500 Subject: [PATCH 082/316] NFS: Remove unnecessary includes from linux/nfs_fs_i.h Also from linux/nfs_xdr.h. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 1 + include/linux/nfs_fs_i.h | 4 ---- include/linux/nfs_xdr.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5668f7c54c41..77a184e2fe47 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h index 861730275ba0..a5c50d97341e 100644 --- a/include/linux/nfs_fs_i.h +++ b/include/linux/nfs_fs_i.h @@ -1,10 +1,6 @@ #ifndef _NFS_FS_I #define _NFS_FS_I -#include -#include -#include - struct nlm_lockowner; /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a764cef06b73..f5188e10ea0e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -2,7 +2,6 @@ #define _LINUX_NFS_XDR_H #include -#include #include /* From d1e284d50a1506aab8ad7895f31b5f93b5647fc9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:24 -0500 Subject: [PATCH 083/316] NFSv4: Clean up nfs4_get_state_owner Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 24 +++++++++++------------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4d7d0aedc101..654b091644c5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -319,7 +319,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) } #endif /* CONFIG_NFS_V4_1 */ -extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_purge_state_owners(struct nfs_server *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0c849c98fe4..53ef365f4372 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1754,7 +1754,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode /* Protect against reboot recovery conflicts */ status = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { + sp = nfs4_get_state_owner(server, cred, GFP_KERNEL); + if (sp == NULL) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a53f33b4ac3a..a8a42a677d8f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -444,13 +444,17 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) * */ static struct nfs4_state_owner * -nfs4_alloc_state_owner(void) +nfs4_alloc_state_owner(struct nfs_server *server, + struct rpc_cred *cred, + gfp_t gfp_flags) { struct nfs4_state_owner *sp; - sp = kzalloc(sizeof(*sp),GFP_NOFS); + sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; + sp->so_server = server; + sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); @@ -516,7 +520,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server) * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, - struct rpc_cred *cred) + struct rpc_cred *cred, + gfp_t gfp_flags) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; @@ -526,20 +531,13 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, spin_unlock(&clp->cl_lock); if (sp != NULL) goto out; - new = nfs4_alloc_state_owner(); + new = nfs4_alloc_state_owner(server, cred, gfp_flags); if (new == NULL) goto out; - new->so_server = server; - new->so_cred = cred; - spin_lock(&clp->cl_lock); sp = nfs4_insert_state_owner_locked(new); spin_unlock(&clp->cl_lock); - if (sp == new) - get_rpccred(cred); - else { - rpc_destroy_wait_queue(&new->so_sequence.wait); - kfree(new); - } + if (sp != new) + nfs4_free_state_owner(new); out: nfs4_gc_state_owners(server); return sp; From 9157c31dd610a127bc6f01bc1953cf8b80382040 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:24 -0500 Subject: [PATCH 084/316] NFSv4: Replace state_owner->so_owner_id with an ida based allocator We're unlikely to ever need more than 2^31 simultaneous open owners, so let's replace the custom allocator with the generic ida allocator. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/nfs4state.c | 17 ++++++++++++----- include/linux/nfs_fs_sb.h | 3 ++- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ca9a4aa38dff..8d1739d3424d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1092,6 +1092,7 @@ static struct nfs_server *nfs_alloc_server(void) return NULL; } + ida_init(&server->openowner_id); pnfs_init_server(server); return server; @@ -1117,6 +1118,7 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); + ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); bdi_destroy(&server->backing_dev_info); kfree(server); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 654b091644c5..091b679747ed 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -92,7 +92,6 @@ struct nfs_unique_id { * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { - struct nfs_unique_id so_owner_id; struct nfs_server *so_server; struct list_head so_lru; unsigned long so_expires; @@ -106,6 +105,7 @@ struct nfs4_state_owner { struct list_head so_states; struct nfs_seqid_counter so_seqid; struct rpc_sequence so_sequence; + int so_owner_id; }; enum { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 53ef365f4372..1dd2e407a901 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -815,7 +815,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_owner_id.id; + p->o_arg.id = sp->so_owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; @@ -1440,7 +1440,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update sequence id. */ - data->o_arg.id = sp->so_owner_id.id; + data->o_arg.id = sp->so_owner_id; data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a8a42a677d8f..8472707286f9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -405,6 +405,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; + int err; while (*p != NULL) { parent = *p; @@ -421,8 +422,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - nfs_alloc_unique_id_locked(&server->openowner_id, - &new->so_owner_id, 1, 64); + err = ida_get_new(&server->openowner_id, &new->so_owner_id); + if (err) + return ERR_PTR(err); rb_link_node(&new->so_server_node, parent, p); rb_insert_color(&new->so_server_node, &server->state_owners); return new; @@ -435,7 +437,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); + ida_remove(&server->openowner_id, sp->so_owner_id); } /* @@ -534,8 +536,13 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, new = nfs4_alloc_state_owner(server, cred, gfp_flags); if (new == NULL) goto out; - sp = nfs4_insert_state_owner_locked(new); - spin_unlock(&clp->cl_lock); + do { + if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) + break; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner_locked(new); + spin_unlock(&clp->cl_lock); + } while (sp == ERR_PTR(-EAGAIN)); if (sp != new) nfs4_free_state_owner(new); out: diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e0863d35f753..645537e18bd0 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -151,9 +152,9 @@ struct nfs_server { /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; - struct rb_root openowner_id; struct rb_root lockowner_id; #endif + struct ida openowner_id; struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; From d2d7ce28a2f8ec6ca2a49145e643d2e3c7d21ba3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:25 -0500 Subject: [PATCH 085/316] NFSv4: Replace lock_owner->ld_id with an ida based allocator Again, We're unlikely to ever need more than 2^31 simultaneous lock owners, so let's replace the custom allocator. Now that there are no more users, we can also get rid of the custom allocator code. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4_fs.h | 7 +--- fs/nfs/nfs4proc.c | 6 ++-- fs/nfs/nfs4state.c | 74 +++++---------------------------------- include/linux/nfs_fs_sb.h | 2 +- 5 files changed, 15 insertions(+), 76 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d1739d3424d..df60d9971b95 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1093,6 +1093,7 @@ static struct nfs_server *nfs_alloc_server(void) } ida_init(&server->openowner_id); + ida_init(&server->lockowner_id); pnfs_init_server(server); return server; @@ -1118,6 +1119,7 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); + ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); bdi_destroy(&server->backing_dev_info); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 091b679747ed..b23cb0cda632 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -81,11 +81,6 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } -struct nfs_unique_id { - struct rb_node rb_node; - __u64 id; -}; - /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only @@ -145,9 +140,9 @@ struct nfs4_lock_state { struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; + int ls_id; struct nfs_seqid_counter ls_seqid; struct rpc_sequence ls_sequence; - struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; struct nfs4_lock_owner ls_owner; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1dd2e407a901..9c77af900960 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4017,7 +4017,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id.id; + arg.lock_owner.id = lsp->ls_id; arg.lock_owner.s_dev = server->s_dev; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { @@ -4262,7 +4262,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id.id; + p->arg.lock_owner.id = lsp->ls_id; p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; @@ -4679,7 +4679,7 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) if (!args) return; args->lock_owner.clientid = server->nfs_client->cl_clientid; - args->lock_owner.id = lsp->ls_id.id; + args->lock_owner.id = lsp->ls_id; args->lock_owner.s_dev = server->s_dev; msg.rpc_argp = args; rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8472707286f9..5abf23615bc5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -317,62 +317,6 @@ out: return cred; } -static void nfs_alloc_unique_id_locked(struct rb_root *root, - struct nfs_unique_id *new, - __u64 minval, int maxbits) -{ - struct rb_node **p, *parent; - struct nfs_unique_id *pos; - __u64 mask = ~0ULL; - - if (maxbits < 64) - mask = (1ULL << maxbits) - 1ULL; - - /* Ensure distribution is more or less flat */ - get_random_bytes(&new->id, sizeof(new->id)); - new->id &= mask; - if (new->id < minval) - new->id += minval; -retry: - p = &root->rb_node; - parent = NULL; - - while (*p != NULL) { - parent = *p; - pos = rb_entry(parent, struct nfs_unique_id, rb_node); - - if (new->id < pos->id) - p = &(*p)->rb_left; - else if (new->id > pos->id) - p = &(*p)->rb_right; - else - goto id_exists; - } - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, root); - return; -id_exists: - for (;;) { - new->id++; - if (new->id < minval || (new->id & mask) != new->id) { - new->id = minval; - break; - } - parent = rb_next(parent); - if (parent == NULL) - break; - pos = rb_entry(parent, struct nfs_unique_id, rb_node); - if (new->id < pos->id) - break; - } - goto retry; -} - -static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) -{ - rb_erase(&id->rb_node, root); -} - static struct nfs4_state_owner * nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) { @@ -800,7 +744,6 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; - struct nfs_client *clp = server->nfs_client; lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) @@ -820,24 +763,23 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f lsp->ls_owner.lo_u.posix_owner = fl_owner; break; default: - kfree(lsp); - return NULL; + goto out_free; } - spin_lock(&clp->cl_lock); - nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); - spin_unlock(&clp->cl_lock); + lsp->ls_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); + if (lsp->ls_id < 0) + goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); return lsp; +out_free: + kfree(lsp); + return NULL; } static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; - struct nfs_client *clp = server->nfs_client; - spin_lock(&clp->cl_lock); - nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id); - spin_unlock(&clp->cl_lock); + ida_simple_remove(&server->lockowner_id, lsp->ls_id); rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 645537e18bd0..c23469308b8e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -152,9 +152,9 @@ struct nfs_server { /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; - struct rb_root lockowner_id; #endif struct ida openowner_id; + struct ida lockowner_id; struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; From 9d12b216aa87f68c96f6dd8eb5d2d0ccc9989b1c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:25 -0500 Subject: [PATCH 086/316] NFSv41: Add a new helper nfs4_init_sequence() Clean up Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 6 +-- fs/nfs/nfs4filelayout.c | 4 +- fs/nfs/nfs4proc.c | 93 +++++++++++++++++++++++++---------------- fs/nfs/read.c | 2 +- fs/nfs/unlink.c | 4 +- fs/nfs/write.c | 2 +- 6 files changed, 65 insertions(+), 46 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b23cb0cda632..0924494e10a2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -230,10 +230,10 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task); + struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task); + struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); @@ -264,7 +264,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser static inline int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) + struct rpc_task *task) { return 0; } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 71ec08617e23..b4f8f9624afa 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -173,7 +173,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - 0, task)) + task)) return; rpc_call_start(task); @@ -254,7 +254,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - 0, task)) + task)) return; rpc_call_start(task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9c77af900960..8926d33383ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -534,10 +534,20 @@ out: return ret_id; } +static void nfs41_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ + args->sa_session = NULL; + args->sa_cache_this = 0; + if (cache_reply) + args->sa_cache_this = 1; + res->sr_session = NULL; + res->sr_slot = NULL; +} + int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) { struct nfs4_slot *slot; @@ -582,7 +592,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, slot = tbl->slots + slotid; args->sa_session = session; args->sa_slotid = slotid; - args->sa_cache_this = cache_reply; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); @@ -602,24 +611,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence); int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) { struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) { - args->sa_session = NULL; - res->sr_session = NULL; + if (session == NULL) goto out; - } dprintk("--> %s clp %p session %p sr_slot %td\n", __func__, session->clp, session, res->sr_slot ? res->sr_slot - session->fc_slot_table.slots : -1); - ret = nfs41_setup_sequence(session, args, res, cache_reply, - task); + ret = nfs41_setup_sequence(session, args, res, task); out: dprintk("<-- %s status=%d\n", __func__, ret); return ret; @@ -629,7 +633,6 @@ struct nfs41_call_sync_data { const struct nfs_server *seq_server; struct nfs4_sequence_args *seq_args; struct nfs4_sequence_res *seq_res; - int cache_reply; }; static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) @@ -639,7 +642,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); if (nfs4_setup_sequence(data->seq_server, data->seq_args, - data->seq_res, data->cache_reply, task)) + data->seq_res, task)) return; rpc_call_start(task); } @@ -672,7 +675,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, int privileged) { int ret; @@ -681,7 +683,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .seq_server = server, .seq_args = args, .seq_res = res, - .cache_reply = cache_reply, }; struct rpc_task_setup task_setup = { .rpc_client = clnt, @@ -690,7 +691,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; - res->sr_slot = NULL; if (privileged) task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); @@ -710,10 +710,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); + nfs41_init_sequence(args, res, cache_reply); + return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); } #else +static inline +void nfs41_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ +} + static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -728,7 +735,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - args->sa_session = res->sr_session = NULL; + nfs41_init_sequence(args, res, cache_reply); return rpc_call_sync(clnt, msg, 0); } @@ -1449,7 +1456,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, 1, task)) + &data->o_res.seq_res, task)) return; rpc_call_start(task); return; @@ -1551,6 +1558,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; + nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; @@ -2030,8 +2038,9 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), - &calldata->arg.seq_args, &calldata->res.seq_res, - 1, task)) + &calldata->arg.seq_args, + &calldata->res.seq_res, + task)) return; rpc_call_start(task); } @@ -2075,6 +2084,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; + nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -2713,8 +2723,8 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) args->bitmask = server->cache_consistency_bitmask; res->server = server; - res->seq_res.sr_slot = NULL; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; + nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); } static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) @@ -2739,6 +2749,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; arg->bitmask = server->attr_bitmask; res->server = server; + nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3233,6 +3244,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message data->timestamp = jiffies; data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } /* Reset the the nfs_read_data to send the read to the MDS. */ @@ -3306,6 +3318,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag data->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3340,6 +3353,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa data->write_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } struct nfs4_renewdata { @@ -3892,7 +3906,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, - &d_data->res.seq_res, 1, task)) + &d_data->res.seq_res, task)) return; rpc_call_start(task); } @@ -3926,6 +3940,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->attr_bitmask; @@ -4143,7 +4158,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, 1, task)) + &calldata->res.seq_res, task)) return; rpc_call_start(task); } @@ -4183,6 +4198,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } + nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4298,7 +4314,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); @@ -4416,6 +4432,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = NFS_LOCK_RECLAIM; task_setup_data.callback_ops = &nfs4_recover_lock_ops; } + nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4931,7 +4948,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, since we're invoked within one */ ret = nfs41_setup_sequence(data->clp->cl_session, &data->args->la_seq_args, - &data->res->lr_seq_res, 0, task); + &data->res->lr_seq_res, task); BUG_ON(ret == -EAGAIN); rpc_call_start(task); @@ -4995,6 +5012,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; + nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5468,7 +5486,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) + if (nfs41_setup_sequence(clp->cl_session, args, res, task)) return; rpc_call_start(task); } @@ -5500,6 +5518,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ nfs_put_client(clp); return ERR_PTR(-ENOMEM); } + nfs41_init_sequence(&calldata->args, &calldata->res, 0); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5561,7 +5580,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); if (nfs41_setup_sequence(calldata->clp->cl_session, &calldata->arg.seq_args, - &calldata->res.seq_res, 0, task)) + &calldata->res.seq_res, task)) return; rpc_call_start(task); @@ -5640,6 +5659,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->clp = clp; calldata->arg.one_fs = 0; + nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -5671,7 +5691,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) * to be no way to prevent it completely. */ if (nfs4_setup_sequence(server, &lgp->args.seq_args, - &lgp->res.seq_res, 0, task)) + &lgp->res.seq_res, task)) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, @@ -5746,6 +5766,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; + nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5766,7 +5787,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, - &lrp->res.seq_res, 0, task)) + &lrp->res.seq_res, task)) return; rpc_call_start(task); } @@ -5832,6 +5853,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) int status; dprintk("--> %s\n", __func__); + nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5932,7 +5954,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->args.inode); if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); } @@ -6019,6 +6041,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -6114,7 +6137,6 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) { - int status; struct nfs41_test_stateid_args args = { .stateid = &state->stateid, }; @@ -6124,9 +6146,8 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta .rpc_argp = &args, .rpc_resp = &res, }; - args.seq_args.sa_session = res.seq_res.sr_session = NULL; - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); - return status; + nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); } static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) @@ -6143,7 +6164,6 @@ static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *stat static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) { - int status; struct nfs41_free_stateid_args args = { .stateid = &state->stateid, }; @@ -6154,9 +6174,8 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat .rpc_resp = &res, }; - args.seq_args.sa_session = res.seq_res.sr_session = NULL; - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); - return status; + nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); } static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfa175c223dc..3c2540d532c7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -472,7 +472,7 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) if (nfs4_setup_sequence(NFS_SERVER(data->inode), &data->args.seq_args, &data->res.seq_res, - 0, task)) + task)) return; rpc_call_start(task); } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 4f9319a2e567..490613b709b6 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -114,7 +114,7 @@ void nfs_unlink_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->dir); if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); } @@ -410,7 +410,7 @@ static void nfs_rename_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->old_dir); if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 834f0fe96f89..0b1831d95849 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1135,7 +1135,7 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) if (nfs4_setup_sequence(NFS_SERVER(data->inode), &data->args.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); } From 7ba127ab9f5f83991df4142d5bc4fc319cd77a54 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:25 -0500 Subject: [PATCH 087/316] NFSv4: Move contents of struct rpc_sequence into struct nfs_seqid_counter Clean up. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 16 +++------------- fs/nfs/nfs4state.c | 36 +++++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0924494e10a2..c4025ae1d071 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -53,21 +53,13 @@ struct nfs4_minor_version_ops { const struct nfs4_state_maintenance_ops *state_renewal_ops; }; -/* - * struct rpc_sequence ensures that RPC calls are sent in the exact - * order that they appear on the list. - */ -struct rpc_sequence { - struct rpc_wait_queue wait; /* RPC call delay queue */ - spinlock_t lock; /* Protects the list */ - struct list_head list; /* Defines sequence of RPC calls */ -}; - #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { - struct rpc_sequence *sequence; int flags; u32 counter; + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ + struct rpc_wait_queue wait; /* RPC call delay queue */ }; struct nfs_seqid { @@ -99,7 +91,6 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; - struct rpc_sequence so_sequence; int so_owner_id; }; @@ -142,7 +133,6 @@ struct nfs4_lock_state { int ls_flags; int ls_id; struct nfs_seqid_counter ls_seqid; - struct rpc_sequence ls_sequence; nfs4_stateid ls_stateid; atomic_t ls_count; struct nfs4_lock_owner ls_owner; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5abf23615bc5..cf7bc39aa0ee 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -384,6 +384,22 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) ida_remove(&server->openowner_id, sp->so_owner_id); } +static void +nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) +{ + sc->flags = 0; + sc->counter = 0; + spin_lock_init(&sc->lock); + INIT_LIST_HEAD(&sc->list); + rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue"); +} + +static void +nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) +{ + rpc_destroy_wait_queue(&sc->wait); +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -403,10 +419,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); - rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); - sp->so_seqid.sequence = &sp->so_sequence; - spin_lock_init(&sp->so_sequence.lock); - INIT_LIST_HEAD(&sp->so_sequence.list); + nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); return sp; @@ -428,7 +441,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { - rpc_destroy_wait_queue(&sp->so_sequence.wait); + nfs4_destroy_seqid_counter(&sp->so_seqid); put_rpccred(sp->so_cred); kfree(sp); } @@ -748,10 +761,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) return NULL; - rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); - spin_lock_init(&lsp->ls_sequence.lock); - INIT_LIST_HEAD(&lsp->ls_sequence.list); - lsp->ls_seqid.sequence = &lsp->ls_sequence; + nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner.lo_type = type; @@ -780,7 +790,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) struct nfs_server *server = lsp->ls_state->owner->so_server; ida_simple_remove(&server->lockowner_id, lsp->ls_id); - rpc_destroy_wait_queue(&lsp->ls_sequence.wait); + nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } @@ -914,7 +924,7 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m void nfs_release_seqid(struct nfs_seqid *seqid) { if (!list_empty(&seqid->list)) { - struct rpc_sequence *sequence = seqid->sequence->sequence; + struct nfs_seqid_counter *sequence = seqid->sequence; spin_lock(&sequence->lock); list_del_init(&seqid->list); @@ -936,7 +946,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); + BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; @@ -987,7 +997,7 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) { - struct rpc_sequence *sequence = seqid->sequence->sequence; + struct nfs_seqid_counter *sequence = seqid->sequence; int status = 0; spin_lock(&sequence->lock); From 48c22eb21071a3524f8b6e587371be35b5e86969 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:25 -0500 Subject: [PATCH 088/316] NFS: Move struct nfs_unique_id into struct nfs_seqid_counter Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 8 ++++++-- fs/nfs/nfs4proc.c | 10 +++++----- fs/nfs/nfs4state.c | 10 +++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c4025ae1d071..df3d02c3e8cb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -53,8 +53,14 @@ struct nfs4_minor_version_ops { const struct nfs4_state_maintenance_ops *state_renewal_ops; }; +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { + int owner_id; int flags; u32 counter; spinlock_t lock; /* Protects the list */ @@ -91,7 +97,6 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; - int so_owner_id; }; enum { @@ -131,7 +136,6 @@ struct nfs4_lock_state { struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; - int ls_id; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; atomic_t ls_count; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8926d33383ed..88a8b6999b4f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -822,7 +822,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_owner_id; + p->o_arg.id = sp->so_seqid.owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; @@ -1447,7 +1447,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update sequence id. */ - data->o_arg.id = sp->so_owner_id; + data->o_arg.id = sp->so_seqid.owner_id; data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; @@ -4032,7 +4032,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id; + arg.lock_owner.id = lsp->ls_seqid.owner_id; arg.lock_owner.s_dev = server->s_dev; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { @@ -4278,7 +4278,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id; + p->arg.lock_owner.id = lsp->ls_seqid.owner_id; p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; @@ -4696,7 +4696,7 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) if (!args) return; args->lock_owner.clientid = server->nfs_client->cl_clientid; - args->lock_owner.id = lsp->ls_id; + args->lock_owner.id = lsp->ls_seqid.owner_id; args->lock_owner.s_dev = server->s_dev; msg.rpc_argp = args; rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cf7bc39aa0ee..a42e60d3ee50 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -366,7 +366,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - err = ida_get_new(&server->openowner_id, &new->so_owner_id); + err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); if (err) return ERR_PTR(err); rb_link_node(&new->so_server_node, parent, p); @@ -381,7 +381,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - ida_remove(&server->openowner_id, sp->so_owner_id); + ida_remove(&server->openowner_id, sp->so_seqid.owner_id); } static void @@ -775,8 +775,8 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f default: goto out_free; } - lsp->ls_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); - if (lsp->ls_id < 0) + lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); + if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); return lsp; @@ -789,7 +789,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; - ida_simple_remove(&server->lockowner_id, lsp->ls_id); + ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } From 536e43d12b9517bbbf6114cd1a12be27857a4d7a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:26 -0500 Subject: [PATCH 089/316] NFS: Optimise away unnecessary setattrs for open(O_TRUNC); Currently, we will correctly optimise away a truncate that doesn't change the file size. However, in the case of open(O_TRUNC), we also want to optimise away the time changes. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 25 +++++++++++++++++++------ fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4proc.c | 10 +++++++--- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fd9a872fada0..bb132a88f4e8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } open_flags = nd->intent.open.flags; + attr.ia_valid = 0; ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); @@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; - attr.ia_valid = ATTR_MODE; + attr.ia_valid |= ATTR_MODE; attr.ia_mode &= ~current_umask(); - } else { + } else open_flags &= ~(O_EXCL | O_CREAT); - attr.ia_valid = 0; + + if (open_flags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; } /* Open the file on the server */ @@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; struct inode *dir; struct nfs_open_context *ctx; + struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; - /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); ctx = create_nfs_open_context(dentry, openflags); ret = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; + + attr.ia_valid = 0; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + /* * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); if (IS_ERR(inode)) { ret = PTR_ERR(inode); switch (ret) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 577ad5a72a24..65486e652943 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -402,7 +402,7 @@ out_no_inode: goto out; } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -424,7 +424,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if ((attr->ia_valid & ~ATTR_FILE) == 0) + if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; /* Write all dirty data */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 88a8b6999b4f..360240cc1e9b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -828,7 +828,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.bitmask = server->attr_bitmask; p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - if (flags & O_CREAT) { + if (attrs != NULL && attrs->ia_valid != 0) { u32 *s; p->o_arg.u.attrs = &p->attrs; @@ -885,7 +885,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode { int ret = 0; - if (open_mode & O_EXCL) + if (open_mode & (O_EXCL|O_TRUNC)) goto out; switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: @@ -1033,7 +1033,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & O_EXCL; + int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; @@ -2431,6 +2431,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } + /* Deal with open(O_TRUNC) */ + if (sattr->ia_valid & ATTR_OPEN) + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); From 2aeb98f498ce37742b743080fdc6c8cf64053599 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:04:26 -0500 Subject: [PATCH 090/316] NFS: Ensure that mmapped pages remain stable during writeback Ensure that nfs_vm_page_mkwrite() waits for the page writeback to complete before the application is allowed to modify page contents. The main reason for wanting to do this in NFS is to ensure that the server doesn't get confused if we have to resend the RPC request due to a dropped/missed reply. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c43a452f7da2..4fdaaa63cf1c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -530,6 +530,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (mapping != dentry->d_inode->i_mapping) goto out_unlock; + wait_on_page_writeback(page); + pagelen = nfs_page_length(page); if (pagelen == 0) goto out_unlock; From 961a828df64979d2a9faeeeee043391670a193b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:57:37 -0500 Subject: [PATCH 091/316] SUNRPC: Fix potential races in xprt_lock_write_next() We have to ensure that the wake up from the waitqueue and the assignment of xprt->snd_task are atomic. We can do this by assigning the snd_task while under the waitqueue spinlock. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 13 ++++++---- fs/nfs/nfs4state.c | 17 ++++++------- include/linux/sunrpc/sched.h | 3 +++ net/sunrpc/sched.c | 42 ++++++++++++++++++++++++------- net/sunrpc/xprt.c | 49 +++++++++++++++++++----------------- 6 files changed, 79 insertions(+), 46 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index df3d02c3e8cb..c45c21a5470f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -222,6 +222,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } +extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 360240cc1e9b..828a76590af9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -385,17 +385,20 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) free_slotid, tbl->highest_used_slotid); } +bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + return true; +} + /* * Signal state manager thread if session fore channel is drained */ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) { - struct rpc_task *task; - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); - if (task) - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, + nfs4_set_task_privileged, NULL); return; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a42e60d3ee50..f0e9881c2aa2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -190,23 +190,22 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; + struct nfs4_slot_table *tbl; int max_slots; if (ses == NULL) return; + tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - spin_lock(&ses->fc_slot_table.slot_tbl_lock); - max_slots = ses->fc_slot_table.max_slots; + spin_lock(&tbl->slot_tbl_lock); + max_slots = tbl->max_slots; while (max_slots--) { - struct rpc_task *task; - - task = rpc_wake_up_next(&ses->fc_slot_table. - slot_tbl_waitq); - if (!task) + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, + nfs4_set_task_privileged, + NULL) == NULL) break; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); } - spin_unlock(&ses->fc_slot_table.slot_tbl_lock); + spin_unlock(&tbl->slot_tbl_lock); } } diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index b16243a35f0b..bd337f990a41 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -235,6 +235,9 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, + bool (*)(struct rpc_task *, void *), + void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); int rpc_queue_empty(struct rpc_wait_queue *); void rpc_delay(struct rpc_task *, unsigned long); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3341d8962786..f982dfe53993 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -422,7 +422,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) +static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -467,30 +467,54 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - rpc_wake_up_task_queue_locked(queue, task); return task; } +static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) +{ + if (RPC_IS_PRIORITY(queue)) + return __rpc_find_next_queued_priority(queue); + if (!list_empty(&queue->tasks[0])) + return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); + return NULL; +} + /* - * Wake up the next task on the wait queue. + * Wake up the first task on the wait queue. */ -struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_next(%p \"%s\")\n", + dprintk("RPC: wake_up_first(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - if (RPC_IS_PRIORITY(queue)) - task = __rpc_wake_up_next_priority(queue); - else { - task_for_first(task, &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task != NULL) { + if (func(task, data)) rpc_wake_up_task_queue_locked(queue, task); + else + task = NULL; } spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_first); + +static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) +{ + return true; +} + +/* + * Wake up the next task on the wait queue. +*/ +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) +{ + return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); +} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c64c0ef519b5..839f6ef2326b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -292,54 +292,57 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return retval; } -static void __xprt_lock_write_next(struct rpc_xprt *xprt) +static bool __xprt_lock_write_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; req->rq_ntrans++; } - return; + return true; +} -out_unlock: +static void __xprt_lock_write_next(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) + return; xprt_clear_locked(xprt); } -static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - if (RPCXPRT_CONGESTED(xprt)) - goto out_unlock; - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; if (req == NULL) { xprt->snd_task = task; - return; + return true; } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; req->rq_bytes_sent = 0; req->rq_ntrans++; - return; + return true; } + return false; +} + +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + if (RPCXPRT_CONGESTED(xprt)) + goto out_unlock; + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) + return; out_unlock: xprt_clear_locked(xprt); } From dff02d499c067bdde589b764321b35fe763569f6 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:10 +0400 Subject: [PATCH 092/316] SUNRPC: move rpcbind internals to sunrpc part of network namespace context This patch makes rpcbind logic works in network namespace context. IOW each network namespace will have it's own unique rpcbind internals (clients and friends) required for registering svc services per network namespace. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/netns.h | 5 ++++ net/sunrpc/rpcb_clnt.c | 64 +++++++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 29 deletions(-) diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 0f3af34fa502..1fdeb1ba84bd 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -15,6 +15,11 @@ struct sunrpc_net { struct list_head all_clients; spinlock_t rpc_client_lock; + + struct rpc_clnt *rpcb_local_clnt; + struct rpc_clnt *rpcb_local_clnt4; + spinlock_t rpcb_clnt_lock; + unsigned int rpcb_users; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 8761bf8e36fc..7d32f19ba868 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -23,12 +23,15 @@ #include #include #include +#include #include #include #include #include +#include "netns.h" + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND #endif @@ -111,12 +114,6 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; -static struct rpc_clnt * rpcb_local_clnt; -static struct rpc_clnt * rpcb_local_clnt4; - -DEFINE_SPINLOCK(rpcb_clnt_lock); -unsigned int rpcb_users; - struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -167,29 +164,31 @@ static void rpcb_map_release(void *data) static int rpcb_get_local(void) { int cnt; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); - spin_lock(&rpcb_clnt_lock); - if (rpcb_users) - rpcb_users++; - cnt = rpcb_users; - spin_unlock(&rpcb_clnt_lock); + spin_lock(&sn->rpcb_clnt_lock); + if (sn->rpcb_users) + sn->rpcb_users++; + cnt = sn->rpcb_users; + spin_unlock(&sn->rpcb_clnt_lock); return cnt; } void rpcb_put_local(void) { - struct rpc_clnt *clnt = rpcb_local_clnt; - struct rpc_clnt *clnt4 = rpcb_local_clnt4; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct rpc_clnt *clnt = sn->rpcb_local_clnt; + struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; int shutdown; - spin_lock(&rpcb_clnt_lock); - if (--rpcb_users == 0) { - rpcb_local_clnt = NULL; - rpcb_local_clnt4 = NULL; + spin_lock(&sn->rpcb_clnt_lock); + if (--sn->rpcb_users == 0) { + sn->rpcb_local_clnt = NULL; + sn->rpcb_local_clnt4 = NULL; } - shutdown = !rpcb_users; - spin_unlock(&rpcb_clnt_lock); + shutdown = !sn->rpcb_users; + spin_unlock(&sn->rpcb_clnt_lock); if (shutdown) { /* @@ -204,14 +203,16 @@ void rpcb_put_local(void) static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) { + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + /* Protected by rpcb_create_local_mutex */ - rpcb_local_clnt = clnt; - rpcb_local_clnt4 = clnt4; + sn->rpcb_local_clnt = clnt; + sn->rpcb_local_clnt4 = clnt4; smp_wmb(); - rpcb_users = 1; + sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, - rpcb_local_clnt4); + "%p, rpcb_local_clnt4: %p)\n", sn->rpcb_local_clnt, + sn->rpcb_local_clnt4); } /* @@ -431,6 +432,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -440,7 +442,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(rpcb_local_clnt, &msg); + return rpcb_register_call(sn->rpcb_local_clnt, &msg); } /* @@ -453,6 +455,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); int result; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -465,7 +468,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(rpcb_local_clnt4, msg); + result = rpcb_register_call(sn->rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -480,6 +483,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); int result; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -492,7 +496,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(rpcb_local_clnt4, msg); + result = rpcb_register_call(sn->rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -500,6 +504,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) { struct rpcbind_args *map = msg->rpc_argp; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); dprintk("RPC: unregistering [%u, %u, '%s'] with " "local rpcbind\n", @@ -508,7 +513,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(rpcb_local_clnt4, msg); + return rpcb_register_call(sn->rpcb_local_clnt4, msg); } /** @@ -566,8 +571,9 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); - if (rpcb_local_clnt4 == NULL) + if (sn->rpcb_local_clnt4 == NULL) return -EPROTONOSUPPORT; if (address == NULL) From 2ea75a10add779b722650aa10836247e7d609fd7 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:18 +0400 Subject: [PATCH 093/316] SUNRPC: optimize net_ns dereferencing in rpcbind creation calls Static rpcbind creation functions can be parametrized by network namespace pointer, calculated only once, instead of using init_net pointer (or taking it from current when virtualization will be comleted) in many places. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 7d32f19ba868..3df276a74d1b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -161,10 +161,10 @@ static void rpcb_map_release(void *data) kfree(map); } -static int rpcb_get_local(void) +static int rpcb_get_local(struct net *net) { int cnt; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); spin_lock(&sn->rpcb_clnt_lock); if (sn->rpcb_users) @@ -201,9 +201,10 @@ void rpcb_put_local(void) } } -static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) +static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, + struct rpc_clnt *clnt4) { - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; @@ -211,22 +212,23 @@ static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p)\n", sn->rpcb_local_clnt, - sn->rpcb_local_clnt4); + "%p, rpcb_local_clnt4: %p) for net %p%s\n", + sn->rpcb_local_clnt, sn->rpcb_local_clnt4, + net, (net == &init_net) ? " (init_net)" : ""); } /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_unix(void) +static int rpcb_create_local_unix(struct net *net) { static const struct sockaddr_un rpcb_localaddr_rpcbind = { .sun_family = AF_LOCAL, .sun_path = RPCBIND_SOCK_PATHNAME, }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_LOCAL, .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, .addrsize = sizeof(rpcb_localaddr_rpcbind), @@ -259,7 +261,7 @@ static int rpcb_create_local_unix(void) clnt4 = NULL; } - rpcb_set_local(clnt, clnt4); + rpcb_set_local(net, clnt, clnt4); out: return result; @@ -269,7 +271,7 @@ out: * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_net(void) +static int rpcb_create_local_net(struct net *net) { static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_family = AF_INET, @@ -277,7 +279,7 @@ static int rpcb_create_local_net(void) .sin_port = htons(RPCBIND_PORT), }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), @@ -311,7 +313,7 @@ static int rpcb_create_local_net(void) clnt4 = NULL; } - rpcb_set_local(clnt, clnt4); + rpcb_set_local(net, clnt, clnt4); out: return result; @@ -325,16 +327,17 @@ int rpcb_create_local(void) { static DEFINE_MUTEX(rpcb_create_local_mutex); int result = 0; + struct net *net = &init_net; - if (rpcb_get_local()) + if (rpcb_get_local(net)) return result; mutex_lock(&rpcb_create_local_mutex); - if (rpcb_get_local()) + if (rpcb_get_local(net)) goto out; - if (rpcb_create_local_unix() != 0) - result = rpcb_create_local_net(); + if (rpcb_create_local_unix(net) != 0) + result = rpcb_create_local_net(net); out: mutex_unlock(&rpcb_create_local_mutex); From 1a114a66466ba914a41df438e806fc10e7d9fb3b Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:26 +0400 Subject: [PATCH 094/316] SUNRPC: optimize net_ns dereferencing in rpcbind registering calls Static rpcbind registering functions can be parametrized by network namespace pointer, calculated only once, instead of using init_net pointer (or taking it from current when virtualization will be comleted) in many places. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df276a74d1b..371d2298c9fc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -451,14 +451,14 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_inet4(const struct sockaddr *sap, +static int rpcb_register_inet4(struct sunrpc_net *sn, + const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); int result; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,14 +479,14 @@ static int rpcb_register_inet4(const struct sockaddr *sap, /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_inet6(const struct sockaddr *sap, +static int rpcb_register_inet6(struct sunrpc_net *sn, + const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); int result; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -504,10 +504,10 @@ static int rpcb_register_inet6(const struct sockaddr *sap, return result; } -static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) +static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, + struct rpc_message *msg) { struct rpcbind_args *map = msg->rpc_argp; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); dprintk("RPC: unregistering [%u, %u, '%s'] with " "local rpcbind\n", @@ -580,13 +580,13 @@ int rpcb_v4_register(const u32 program, const u32 version, return -EPROTONOSUPPORT; if (address == NULL) - return rpcb_unregister_all_protofamilies(&msg); + return rpcb_unregister_all_protofamilies(sn, &msg); switch (address->sa_family) { case AF_INET: - return rpcb_register_inet4(address, &msg); + return rpcb_register_inet4(sn, address, &msg); case AF_INET6: - return rpcb_register_inet6(address, &msg); + return rpcb_register_inet6(sn, address, &msg); } return -EAFNOSUPPORT; From c2550e07a61a4528673fb85aaaee16048b7cf6cc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:35 +0400 Subject: [PATCH 095/316] SUNRPC: create rpcbind client in passed network namespace context Rpcbind clients are per network namespace. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 371d2298c9fc..0d76c0f09bb8 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -344,11 +344,12 @@ out: return result; } -static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version) +static struct rpc_clnt *rpcb_create(struct net *net, char *hostname, + struct sockaddr *srvaddr, size_t salen, + int proto, u32 version) { struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = proto, .address = srvaddr, .addrsize = salen, @@ -708,8 +709,8 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version); + rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_server, sap, salen, + xprt->prot, bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", From 977ac3157328239a0f4074b13a3d9eb5c832cd6c Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:43 +0400 Subject: [PATCH 096/316] SUNRPC: register rpcbind programs in passed network namespase context Registering rpcbind program requires rpcbind clients, which are per network namespace context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 5 +++-- net/sunrpc/rpcb_clnt.c | 8 ++++---- net/sunrpc/svc.c | 10 +++++----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index bfd61852b718..b0b3e572a619 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -138,8 +138,9 @@ void rpc_task_release_client(struct rpc_task *); int rpcb_create_local(void); void rpcb_put_local(void); -int rpcb_register(u32, u32, int, unsigned short); -int rpcb_v4_register(const u32 program, const u32 version, +int rpcb_register(struct net *, u32, u32, int, unsigned short); +int rpcb_v4_register(struct net *net, const u32 program, + const u32 version, const struct sockaddr *address, const char *netid); void rpcb_getport_async(struct rpc_task *); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0d76c0f09bb8..a5aa50d0aae4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -425,7 +425,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 * addresses). */ -int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) +int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short port) { struct rpcbind_args map = { .r_prog = prog, @@ -436,7 +436,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -563,7 +563,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, * service on any IPv4 address, but not on IPv6. The latter * advertises the service on all IPv4 and IPv6 addresses. */ -int rpcb_v4_register(const u32 program, const u32 version, +int rpcb_v4_register(struct net *net, const u32 program, const u32 version, const struct sockaddr *address, const char *netid) { struct rpcbind_args map = { @@ -575,7 +575,7 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); if (sn->rpcb_local_clnt4 == NULL) return -EPROTONOSUPPORT; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e4aabc02368b..889d7b11b9ac 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -818,7 +818,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(program, version, + error = rpcb_v4_register(&init_net, program, version, (const struct sockaddr *)&sin, netid); /* @@ -826,7 +826,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, * registration request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(program, version, protocol, port); + error = rpcb_register(&init_net, program, version, protocol, port); return error; } @@ -865,7 +865,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(program, version, + error = rpcb_v4_register(&init_net, program, version, (const struct sockaddr *)&sin6, netid); /* @@ -968,14 +968,14 @@ static void __svc_unregister(const u32 program, const u32 version, { int error; - error = rpcb_v4_register(program, version, NULL, ""); + error = rpcb_v4_register(&init_net, program, version, NULL, ""); /* * User space didn't support rpcbind v4, so retry this * request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(program, version, 0, 0); + error = rpcb_register(&init_net, program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); From f7a30c18e8d673c996095420a026a28433cb4096 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 12:52:51 +0400 Subject: [PATCH 097/316] SUNRPC: parametrize local rpcbind clients creation with net ns These client are per network namespace and thus can be created for different network namespaces. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- net/sunrpc/rpcb_clnt.c | 7 +++---- net/sunrpc/svc.c | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b0b3e572a619..e891a8a18cb3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -136,8 +136,8 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_client(struct rpc_task *); -int rpcb_create_local(void); -void rpcb_put_local(void); +int rpcb_create_local(struct net *); +void rpcb_put_local(struct net *); int rpcb_register(struct net *, u32, u32, int, unsigned short); int rpcb_v4_register(struct net *net, const u32 program, const u32 version, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index a5aa50d0aae4..6d6a84f67449 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -175,9 +175,9 @@ static int rpcb_get_local(struct net *net) return cnt; } -void rpcb_put_local(void) +void rpcb_put_local(struct net *net) { - struct sunrpc_net *sn = net_generic(&init_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_clnt *clnt = sn->rpcb_local_clnt; struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; int shutdown; @@ -323,11 +323,10 @@ out: * Returns zero on success, otherwise a negative errno value * is returned. */ -int rpcb_create_local(void) +int rpcb_create_local(struct net *net) { static DEFINE_MUTEX(rpcb_create_local_mutex); int result = 0; - struct net *net = &init_net; if (rpcb_get_local(net)) return result; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 889d7b11b9ac..0aee925fbd73 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -372,7 +372,7 @@ static int svc_rpcb_setup(struct svc_serv *serv) { int err; - err = rpcb_create_local(); + err = rpcb_create_local(&init_net); if (err) return err; @@ -384,7 +384,7 @@ static int svc_rpcb_setup(struct svc_serv *serv) void svc_rpcb_cleanup(struct svc_serv *serv) { svc_unregister(serv); - rpcb_put_local(); + rpcb_put_local(&init_net); } EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); From 3065f1e29aa2716c8903cfeff368df4b5314040a Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 13:09:03 +0400 Subject: [PATCH 098/316] SUNRPC: parametrize rpc_parse_scope_id() by network context Parametrize rpc_parse_scope_id() by network context and thus force it's caller to pass in network context instead of using hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index ee77742e0ed6..cc83de1c2224 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -156,8 +156,9 @@ static size_t rpc_pton4(const char *buf, const size_t buflen, } #if IS_ENABLED(CONFIG_IPV6) -static int rpc_parse_scope_id(const char *buf, const size_t buflen, - const char *delim, struct sockaddr_in6 *sin6) +static int rpc_parse_scope_id(struct net *net, const char *buf, + const size_t buflen, const char *delim, + struct sockaddr_in6 *sin6) { char *p; size_t len; @@ -177,7 +178,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen, unsigned long scope_id = 0; struct net_device *dev; - dev = dev_get_by_name(&init_net, p); + dev = dev_get_by_name(net, p); if (dev != NULL) { scope_id = dev->ifindex; dev_put(dev); @@ -213,7 +214,7 @@ static size_t rpc_pton6(const char *buf, const size_t buflen, if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) return 0; - if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + if (!rpc_parse_scope_id(&init_net, buf, buflen, delim, sin6)) return 0; sin6->sin6_family = AF_INET6; From 8b147f74738d9ab7e76085e5535e0fe8dc8b29f4 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 13:09:11 +0400 Subject: [PATCH 099/316] SUNRPC: parametrize rpc_pton6() by network context Parametrize rpc_pton6() by network context and thus force it's caller to pass in network context instead of using hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index cc83de1c2224..18c40a2002fb 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -198,7 +198,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; } -static size_t rpc_pton6(const char *buf, const size_t buflen, +static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; @@ -214,14 +214,14 @@ static size_t rpc_pton6(const char *buf, const size_t buflen, if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) return 0; - if (!rpc_parse_scope_id(&init_net, buf, buflen, delim, sin6)) + if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6)) return 0; sin6->sin6_family = AF_INET6; return sizeof(struct sockaddr_in6); } #else -static size_t rpc_pton6(const char *buf, const size_t buflen, +static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { return 0; @@ -249,7 +249,7 @@ size_t rpc_pton(const char *buf, const size_t buflen, for (i = 0; i < buflen; i++) if (buf[i] == ':') - return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton6(&init_net, buf, buflen, sap, salen); return rpc_pton4(buf, buflen, sap, salen); } EXPORT_SYMBOL_GPL(rpc_pton); From 90100b1766c914c820baa78b5be6845fae1159b8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 13:09:19 +0400 Subject: [PATCH 100/316] SUNRPC: parametrize rpc_pton() by network context Parametrize rpc_pton() by network context and thus force it's callers to pass in network context instead of using hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 4 ++-- fs/nfs/nfs4filelayoutdev.c | 2 +- fs/nfs/nfs4namespace.c | 2 +- fs/nfs/super.c | 4 ++-- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/addr.c | 7 ++++--- net/sunrpc/svcauth_unix.c | 2 +- 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 200eb67c95d9..7edc62a8a64f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -20,7 +20,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); if (ip_len > 0) - ret = rpc_pton(ip_addr, ip_len, sa, salen); + ret = rpc_pton(&init_net, ip_addr, ip_len, sa, salen); else ret = -ESRCH; kfree(ip_addr); @@ -224,7 +224,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) len = qword_get(&buf, buf1, sizeof(buf1)); if (len <= 0) goto out; - key.addrlen = rpc_pton(buf1, len, + key.addrlen = rpc_pton(&init_net, buf1, len, (struct sockaddr *)&key.addr, sizeof(key.addr)); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 8ae91908f5aa..0d8b9523a3cb 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -457,7 +457,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + if (!rpc_pton(&init_net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); goto out_free_da; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 919a36935924..48a9acdbaeb6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -98,7 +98,7 @@ static size_t nfs_parse_server_name(char *string, size_t len, { ssize_t ret; - ret = rpc_pton(string, len, sa, salen); + ret = rpc_pton(&init_net, string, len, sa, salen); if (ret == 0) { ret = nfs_dns_resolve_name(server->client->cl_xprt->xprt_net, string, len, sa, salen); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e45feb0fee59..b79f2a11c29e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1408,7 +1408,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->nfs_server.addrlen = - rpc_pton(string, strlen(string), + rpc_pton(&init_net, string, strlen(string), (struct sockaddr *) &mnt->nfs_server.address, sizeof(mnt->nfs_server.address)); @@ -1430,7 +1430,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->mount_server.addrlen = - rpc_pton(string, strlen(string), + rpc_pton(&init_net, string, strlen(string), (struct sockaddr *) &mnt->mount_server.address, sizeof(mnt->mount_server.address)); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 748eda93ce59..330352d379b6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(fo_path, size, sap, salen) == 0) + if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e891a8a18cb3..7bd114fc84e3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -163,7 +163,7 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(const char *, const size_t, +size_t rpc_pton(struct net *, const char *, const size_t, struct sockaddr *, const size_t); char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); size_t rpc_uaddr2sockaddr(const char *, const size_t, diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 18c40a2002fb..82b06b73c1e1 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -230,6 +230,7 @@ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, /** * rpc_pton - Construct a sockaddr in @sap + * @net: applicable network namespace * @buf: C string containing presentation format IP address * @buflen: length of presentation address in bytes * @sap: buffer into which to plant socket address @@ -242,14 +243,14 @@ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, * socket address, if successful. Returns zero if an error * occurred. */ -size_t rpc_pton(const char *buf, const size_t buflen, +size_t rpc_pton(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { unsigned int i; for (i = 0; i < buflen; i++) if (buf[i] == ':') - return rpc_pton6(&init_net, buf, buflen, sap, salen); + return rpc_pton6(net, buf, buflen, sap, salen); return rpc_pton4(buf, buflen, sap, salen); } EXPORT_SYMBOL_GPL(rpc_pton); @@ -340,7 +341,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, port = (unsigned short)((porthi << 8) | portlo); *c = '\0'; - if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + if (rpc_pton(&init_net, buf, strlen(buf), sap, salen) == 0) return 0; switch (sap->sa_family) { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 01153ead1dba..2f8c426c1384 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -211,7 +211,7 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0) + if (rpc_pton(&init_net, buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; switch (address.sa.sa_family) { case AF_INET: From f2ac4dc911fdbc9b98a6a48b40efc45aa9161775 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 13:09:27 +0400 Subject: [PATCH 101/316] SUNRPC: parametrize rpc_uaddr2sockaddr() by network context Parametrize rpc_uaddr2sockaddr() by network context and thus force it's callers to pass in network context instead of using hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4state.c | 2 +- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/addr.c | 8 +++++--- net/sunrpc/rpcb_clnt.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e8c98f009670..c5cddd659429 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7bd114fc84e3..9e754e3458fc 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -166,7 +166,7 @@ size_t rpc_ntop(const struct sockaddr *, char *, const size_t); size_t rpc_pton(struct net *, const char *, const size_t, struct sockaddr *, const size_t); char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(const char *, const size_t, +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, struct sockaddr *, const size_t); static inline unsigned short rpc_get_port(const struct sockaddr *sap) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 82b06b73c1e1..d11418f97f1f 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -297,6 +297,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @net: applicable network namespace * @uaddr: C string containing universal address to convert * @uaddr_len: length of universal address string * @sap: buffer into which to plant socket address @@ -308,8 +309,9 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) * Returns the size of the socket address if successful; otherwise * zero is returned. */ -size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, - struct sockaddr *sap, const size_t salen) +size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, + const size_t uaddr_len, struct sockaddr *sap, + const size_t salen) { char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; unsigned long portlo, porthi; @@ -341,7 +343,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, port = (unsigned short)((porthi << 8) | portlo); *c = '\0'; - if (rpc_pton(&init_net, buf, strlen(buf), sap, salen) == 0) + if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0) return 0; switch (sap->sa_family) { diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6d6a84f67449..7c6fac004447 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -934,7 +934,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, (char *)p); - if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) + if (rpc_uaddr2sockaddr(&init_net, (char *)p, len, sap, sizeof(address)) == 0) goto out_fail; rpcb->r_port = rpc_get_port(sap); From b030fb0bb113316cc6f56779388b8572ab0699da Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 14:02:40 +0400 Subject: [PATCH 102/316] SUNRPC: use proper network namespace in rpcbind RPCBPROC_GETADDR procedure Pass request socket network namespace to rpc_uaddr2sockaddr() instead of hardcoded "init_net", when decoding address in RPCBPROC_GETADDR procedure. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 7c6fac004447..4ce3a8e02953 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -934,7 +934,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, (char *)p); - if (rpc_uaddr2sockaddr(&init_net, (char *)p, len, sap, sizeof(address)) == 0) + if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, + sap, sizeof(address)) == 0) goto out_fail; rpcb->r_port = rpc_get_port(sap); From 5247fab5c82779174d50590e0200bf532248a8a1 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 14:02:48 +0400 Subject: [PATCH 103/316] SUNRPC: pass network namespace to service registering routines Lockd and NFSd services will handle requests from and to many network nsamespaces. And thus have to be registered and unregistered per network namespace. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 42 +++++++++++++++++++++----------------- net/sunrpc/svcsock.c | 3 ++- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 35b37b1e9299..d3563c2a5808 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -428,7 +428,7 @@ void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); -int svc_register(const struct svc_serv *, const int, +int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0aee925fbd73..a2d3330b70de 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -30,7 +30,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP -static void svc_unregister(const struct svc_serv *serv); +static void svc_unregister(const struct svc_serv *serv, struct net *net); #define svc_serv_is_pooled(serv) ((serv)->sv_function) @@ -377,13 +377,13 @@ static int svc_rpcb_setup(struct svc_serv *serv) return err; /* Remove any stale portmap registrations */ - svc_unregister(serv); + svc_unregister(serv, &init_net); return 0; } void svc_rpcb_cleanup(struct svc_serv *serv) { - svc_unregister(serv); + svc_unregister(serv, &init_net); rpcb_put_local(&init_net); } EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); @@ -795,7 +795,8 @@ EXPORT_SYMBOL_GPL(svc_exit_thread); * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register4(const u32 program, const u32 version, +static int __svc_rpcb_register4(struct net *net, const u32 program, + const u32 version, const unsigned short protocol, const unsigned short port) { @@ -818,7 +819,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(&init_net, program, version, + error = rpcb_v4_register(net, program, version, (const struct sockaddr *)&sin, netid); /* @@ -826,7 +827,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, * registration request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(&init_net, program, version, protocol, port); + error = rpcb_register(net, program, version, protocol, port); return error; } @@ -842,7 +843,8 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register6(const u32 program, const u32 version, +static int __svc_rpcb_register6(struct net *net, const u32 program, + const u32 version, const unsigned short protocol, const unsigned short port) { @@ -865,7 +867,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(&init_net, program, version, + error = rpcb_v4_register(net, program, version, (const struct sockaddr *)&sin6, netid); /* @@ -885,7 +887,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(const char *progname, +static int __svc_register(struct net *net, const char *progname, const u32 program, const u32 version, const int family, const unsigned short protocol, @@ -895,12 +897,12 @@ static int __svc_register(const char *progname, switch (family) { case PF_INET: - error = __svc_rpcb_register4(program, version, + error = __svc_rpcb_register4(net, program, version, protocol, port); break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: - error = __svc_rpcb_register6(program, version, + error = __svc_rpcb_register6(net, program, version, protocol, port); #endif } @@ -914,14 +916,16 @@ static int __svc_register(const char *progname, /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @net: net namespace for the service to register * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const int family, - const unsigned short proto, const unsigned short port) +int svc_register(const struct svc_serv *serv, struct net *net, + const int family, const unsigned short proto, + const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -946,7 +950,7 @@ int svc_register(const struct svc_serv *serv, const int family, if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(progp->pg_name, progp->pg_prog, + error = __svc_register(net, progp->pg_name, progp->pg_prog, i, family, proto, port); if (error < 0) break; @@ -963,19 +967,19 @@ int svc_register(const struct svc_serv *serv, const int family, * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient * in this case to clear all existing entries for [program, version]. */ -static void __svc_unregister(const u32 program, const u32 version, +static void __svc_unregister(struct net *net, const u32 program, const u32 version, const char *progname) { int error; - error = rpcb_v4_register(&init_net, program, version, NULL, ""); + error = rpcb_v4_register(net, program, version, NULL, ""); /* * User space didn't support rpcbind v4, so retry this * request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(&init_net, program, version, 0, 0); + error = rpcb_register(net, program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); @@ -989,7 +993,7 @@ static void __svc_unregister(const u32 program, const u32 version, * The result of unregistration is reported via dprintk for those who want * verification of the result, but is otherwise not important. */ -static void svc_unregister(const struct svc_serv *serv) +static void svc_unregister(const struct svc_serv *serv, struct net *net) { struct svc_program *progp; unsigned long flags; @@ -1006,7 +1010,7 @@ static void svc_unregister(const struct svc_serv *serv) dprintk("svc: attempting to unregister %sv%u\n", progp->pg_name, i); - __svc_unregister(progp->pg_prog, i, progp->pg_name); + __svc_unregister(net, progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 464570906f80..e8af0c9e436b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1409,7 +1409,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, + *errp = svc_register(serv, sock->sk->sk_net, inet->sk_family, + inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { From bee42f688c915b510a4aabae4f7a99457137d6f3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 14:02:56 +0400 Subject: [PATCH 104/316] SUNRPC: register service on creation in current network namespace Service, using rpcbind (Lockd, NFSd) are starting from userspace call and thus we can use current network namespace. There could be a problem with NFSd service, because it's creation can be called through NFSd fs from different network namespace. But this is a part of "NFSd per net ns" task and will be fixed in future. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a2d3330b70de..cb2caaee2af9 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -368,16 +369,16 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) return &serv->sv_pools[pidx % serv->sv_nrpools]; } -static int svc_rpcb_setup(struct svc_serv *serv) +static int svc_rpcb_setup(struct svc_serv *serv, struct net *net) { int err; - err = rpcb_create_local(&init_net); + err = rpcb_create_local(net); if (err) return err; /* Remove any stale portmap registrations */ - svc_unregister(serv, &init_net); + svc_unregister(serv, net); return 0; } @@ -470,7 +471,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, } if (svc_uses_rpcbind(serv)) { - if (svc_rpcb_setup(serv) < 0) { + if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) { kfree(serv->sv_pools); kfree(serv); return NULL; From 5ecebb7c7fd737cf387a552994df319c063973db Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 13 Jan 2012 14:03:04 +0400 Subject: [PATCH 105/316] SUNRPC: unregister service on creation in current network namespace On service shutdown we can be sure, that no more users of it left except current. Thus it looks like using current network namespace context is safe in this case. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfsd/nfssvc.c | 4 ++-- include/linux/sunrpc/svc.h | 9 +++++---- net/sunrpc/svc.c | 14 +++++++------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index eda7d7e55e05..fce472f5f39e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -251,13 +251,13 @@ static void nfsd_shutdown(void) nfsd_up = false; } -static void nfsd_last_thread(struct svc_serv *serv) +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { /* When last nfsd thread exits we need to do some clean-up */ nfsd_serv = NULL; nfsd_shutdown(); - svc_rpcb_cleanup(serv); + svc_rpcb_cleanup(serv, net); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d3563c2a5808..7b65495aa4ef 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -84,7 +84,8 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - void (*sv_shutdown)(struct svc_serv *serv); + void (*sv_shutdown)(struct svc_serv *serv, + struct net *net); /* Callback to use when last thread * exits. */ @@ -413,14 +414,14 @@ struct svc_procedure { /* * Function prototypes. */ -void svc_rpcb_cleanup(struct svc_serv *serv); +void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *)); + void (*shutdown)(struct svc_serv *, struct net *net)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *), + void (*shutdown)(struct svc_serv *, struct net *net), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index cb2caaee2af9..a8b49a044619 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -382,10 +382,10 @@ static int svc_rpcb_setup(struct svc_serv *serv, struct net *net) return 0; } -void svc_rpcb_cleanup(struct svc_serv *serv) +void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) { - svc_unregister(serv, &init_net); - rpcb_put_local(&init_net); + svc_unregister(serv, net); + rpcb_put_local(net); } EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); @@ -411,7 +411,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv, struct net *net)) { struct svc_serv *serv; unsigned int vers; @@ -485,7 +485,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv, struct net *net)) { return __svc_create(prog, bufsize, /*npools*/1, shutdown); } @@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv, struct net *net), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; @@ -542,7 +542,7 @@ svc_destroy(struct svc_serv *serv) svc_close_all(serv); if (serv->sv_shutdown) - serv->sv_shutdown(serv); + serv->sv_shutdown(serv, current->nsproxy->net_ns); cache_clean_deferred(serv); From 0a402d5a653ee2b613aaba3092a87b1e964622ce Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 21:42:21 +0400 Subject: [PATCH 106/316] SUNRPC: cache creation and destruction routines introduced This patch prepares infrastructure for network namespace aware cache detail allocation. One note about adding network namespace link to cache structure. It's going to be used later in NFS DNS cache parsing routine (nfs_dns_parse for rpc_pton() call). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 4 ++++ net/sunrpc/cache.c | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 590a8ab0cec3..259381ca811b 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -117,6 +117,7 @@ struct cache_detail { struct cache_detail_procfs procfs; struct cache_detail_pipefs pipefs; } u; + struct net *net; }; @@ -202,6 +203,9 @@ extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister(struct cache_detail *cd); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); +extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern void cache_destroy_net(struct cache_detail *cd, struct net *net); + extern void sunrpc_init_cache_detail(struct cache_detail *cd); extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index fefe06729f9d..a450b8ac648b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1664,6 +1664,32 @@ void cache_unregister(struct cache_detail *cd) } EXPORT_SYMBOL_GPL(cache_unregister); +struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) +{ + struct cache_detail *cd; + + cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + return ERR_PTR(-ENOMEM); + + cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), + GFP_KERNEL); + if (cd->hash_table == NULL) { + kfree(cd); + return ERR_PTR(-ENOMEM); + } + cd->net = net; + return cd; +} +EXPORT_SYMBOL_GPL(cache_create_net); + +void cache_destroy_net(struct cache_detail *cd, struct net *net) +{ + kfree(cd->hash_table); + kfree(cd); +} +EXPORT_SYMBOL_GPL(cache_destroy_net); + static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { From 73393232d6a425b6bb4cee590e3e66fc52532a15 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 21:42:29 +0400 Subject: [PATCH 107/316] SUNRPC: create unix gid cache per network namespace v2: 1) fixed silly usage of template cache as a real one (this code left from static global cache for all) This patch makes unix_gid_cache cache detail allocated and registered per network namespace context. Thus with this patch unix_gid_cache contents for network namespace "X" are controlled from proc file system mount for the same network namespace "X". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- net/sunrpc/netns.h | 1 + net/sunrpc/sunrpc_syms.c | 14 +++++++--- net/sunrpc/svcauth_unix.c | 55 +++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 1fdeb1ba84bd..309f88ddb060 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -9,6 +9,7 @@ struct cache_detail; struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; + struct cache_detail *unix_gid_cache; struct super_block *pipefs_sb; struct mutex pipefs_sb_lock; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index b4217dc8599c..38a72a1b465b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -26,6 +26,9 @@ int sunrpc_net_id; +extern int unix_gid_cache_create(struct net *net); +extern int unix_gid_cache_destroy(struct net *net); + static __net_init int sunrpc_init_net(struct net *net) { int err; @@ -39,11 +42,17 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_ipmap; + err = unix_gid_cache_create(net); + if (err) + goto err_unixgid; + rpc_pipefs_init_net(net); INIT_LIST_HEAD(&sn->all_clients); spin_lock_init(&sn->rpc_client_lock); return 0; +err_unixgid: + ip_map_cache_destroy(net); err_ipmap: rpc_proc_exit(net); err_proc: @@ -52,6 +61,7 @@ err_proc: static __net_exit void sunrpc_exit_net(struct net *net) { + unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); } @@ -63,8 +73,6 @@ static struct pernet_operations sunrpc_net_ops = { .size = sizeof(struct sunrpc_net), }; -extern struct cache_detail unix_gid_cache; - static int __init init_sunrpc(void) { @@ -86,7 +94,6 @@ init_sunrpc(void) #ifdef RPC_DEBUG rpc_register_sysctl(); #endif - cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; @@ -109,7 +116,6 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&unix_gid_cache); unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2f8c426c1384..a6eef38fb35c 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -436,7 +436,6 @@ struct unix_gid { uid_t uid; struct group_info *gi; }; -static struct cache_head *gid_table[GID_HASHMAX]; static void unix_gid_put(struct kref *kref) { @@ -494,8 +493,7 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); } -static struct unix_gid *unix_gid_lookup(uid_t uid); -extern struct cache_detail unix_gid_cache; +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -539,19 +537,19 @@ static int unix_gid_parse(struct cache_detail *cd, GROUP_AT(ug.gi, i) = gid; } - ugp = unix_gid_lookup(uid); + ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; ug.h.flags = 0; ug.h.expiry_time = expiry; - ch = sunrpc_cache_update(&unix_gid_cache, + ch = sunrpc_cache_update(cd, &ug.h, &ugp->h, hash_long(uid, GID_HASHBITS)); if (!ch) err = -ENOMEM; else { err = 0; - cache_put(ch, &unix_gid_cache); + cache_put(ch, cd); } } else err = -ENOMEM; @@ -587,10 +585,9 @@ static int unix_gid_show(struct seq_file *m, return 0; } -struct cache_detail unix_gid_cache = { +static struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, - .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, @@ -602,14 +599,42 @@ struct cache_detail unix_gid_cache = { .alloc = unix_gid_alloc, }; -static struct unix_gid *unix_gid_lookup(uid_t uid) +int unix_gid_cache_create(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&unix_gid_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->unix_gid_cache = cd; + return 0; +} + +void unix_gid_cache_destroy(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->unix_gid_cache; + + sn->unix_gid_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(&unix_gid_cache, &ug.h, - hash_long(uid, GID_HASHBITS)); + ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS)); if (ch) return container_of(ch, struct unix_gid, h); else @@ -621,11 +646,13 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) struct unix_gid *ug; struct group_info *gi; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, + sunrpc_net_id); - ug = unix_gid_lookup(uid); + ug = unix_gid_lookup(sn->unix_gid_cache, uid); if (!ug) return ERR_PTR(-EAGAIN); - ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); + ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle); switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); @@ -633,7 +660,7 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); - cache_put(&ug->h, &unix_gid_cache); + cache_put(&ug->h, sn->unix_gid_cache); return gi; default: return ERR_PTR(-EAGAIN); From a1db410d0bbadc49943f0fcddb21702ceb429396 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 21:42:37 +0400 Subject: [PATCH 108/316] SUNRPC: create GSS auth cache per network namespace This patch makes GSS auth cache details allocated and registered per network namespace context. Thus with this patch rsi_cache and rsc_cache contents for network namespace "X" are controlled from proc file system mount for the same network namespace "X". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- include/linux/sunrpc/svcauth_gss.h | 2 + net/sunrpc/auth_gss/auth_gss.c | 21 ++++ net/sunrpc/auth_gss/svcauth_gss.c | 177 ++++++++++++++++++++--------- net/sunrpc/netns.h | 2 + net/sunrpc/sunrpc_syms.c | 1 + 5 files changed, 147 insertions(+), 56 deletions(-) diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 83bbee3f089c..7c32daa025eb 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -18,6 +18,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); +int gss_svc_init_net(struct net *net); +void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); char *svc_gss_principal(struct svc_rqst *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5ebb602cabe0..cb2e56452748 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1662,6 +1662,21 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .release_pipe = gss_pipe_release, }; +static __net_init int rpcsec_gss_init_net(struct net *net) +{ + return gss_svc_init_net(net); +} + +static __net_exit void rpcsec_gss_exit_net(struct net *net) +{ + gss_svc_shutdown_net(net); +} + +static struct pernet_operations rpcsec_gss_net_ops = { + .init = rpcsec_gss_init_net, + .exit = rpcsec_gss_exit_net, +}; + /* * Initialize RPCSEC_GSS module */ @@ -1675,8 +1690,13 @@ static int __init init_rpcsec_gss(void) err = gss_svc_init(); if (err) goto out_unregister; + err = register_pernet_subsys(&rpcsec_gss_net_ops); + if (err) + goto out_svc_exit; rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version"); return 0; +out_svc_exit: + gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: @@ -1685,6 +1705,7 @@ out: static void __exit exit_rpcsec_gss(void) { + unregister_pernet_subsys(&rpcsec_gss_net_ops); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 8d0f7d3c71c8..1600cfb1618c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -48,6 +48,8 @@ #include #include +#include "../netns.h" + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -75,10 +77,8 @@ struct rsi { int major_status, minor_status; }; -static struct cache_head *rsi_table[RSI_HASHMAX]; -static struct cache_detail rsi_cache; -static struct rsi *rsi_update(struct rsi *new, struct rsi *old); -static struct rsi *rsi_lookup(struct rsi *item); +static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old); +static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item); static void rsi_free(struct rsi *rsii) { @@ -216,7 +216,7 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.in_token, buf, len)) goto out; - rsip = rsi_lookup(&rsii); + rsip = rsi_lookup(cd, &rsii); if (!rsip) goto out; @@ -258,21 +258,20 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.out_token, buf, len)) goto out; rsii.h.expiry_time = expiry; - rsip = rsi_update(&rsii, rsip); + rsip = rsi_update(cd, &rsii, rsip); status = 0; out: rsi_free(&rsii); if (rsip) - cache_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, cd); else status = -ENOMEM; return status; } -static struct cache_detail rsi_cache = { +static struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, - .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, @@ -283,24 +282,24 @@ static struct cache_detail rsi_cache = { .alloc = rsi_alloc, }; -static struct rsi *rsi_lookup(struct rsi *item) +static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item) { struct cache_head *ch; int hash = rsi_hash(item); - ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash); + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct rsi, h); else return NULL; } -static struct rsi *rsi_update(struct rsi *new, struct rsi *old) +static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old) { struct cache_head *ch; int hash = rsi_hash(new); - ch = sunrpc_cache_update(&rsi_cache, &new->h, + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsi, h); @@ -339,10 +338,8 @@ struct rsc { char *client_name; }; -static struct cache_head *rsc_table[RSC_HASHMAX]; -static struct cache_detail rsc_cache; -static struct rsc *rsc_update(struct rsc *new, struct rsc *old); -static struct rsc *rsc_lookup(struct rsc *item); +static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); +static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item); static void rsc_free(struct rsc *rsci) { @@ -444,7 +441,7 @@ static int rsc_parse(struct cache_detail *cd, if (expiry == 0) goto out; - rscp = rsc_lookup(&rsci); + rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; @@ -506,22 +503,21 @@ static int rsc_parse(struct cache_detail *cd, } rsci.h.expiry_time = expiry; - rscp = rsc_update(&rsci, rscp); + rscp = rsc_update(cd, &rsci, rscp); status = 0; out: gss_mech_put(gm); rsc_free(&rsci); if (rscp) - cache_put(&rscp->h, &rsc_cache); + cache_put(&rscp->h, cd); else status = -ENOMEM; return status; } -static struct cache_detail rsc_cache = { +static struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, - .hash_table = rsc_table, .name = "auth.rpcsec.context", .cache_put = rsc_put, .cache_parse = rsc_parse, @@ -531,24 +527,24 @@ static struct cache_detail rsc_cache = { .alloc = rsc_alloc, }; -static struct rsc *rsc_lookup(struct rsc *item) +static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item) { struct cache_head *ch; int hash = rsc_hash(item); - ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash); + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct rsc, h); else return NULL; } -static struct rsc *rsc_update(struct rsc *new, struct rsc *old) +static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old) { struct cache_head *ch; int hash = rsc_hash(new); - ch = sunrpc_cache_update(&rsc_cache, &new->h, + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsc, h); @@ -558,7 +554,7 @@ static struct rsc *rsc_update(struct rsc *new, struct rsc *old) static struct rsc * -gss_svc_searchbyctx(struct xdr_netobj *handle) +gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; @@ -566,11 +562,11 @@ gss_svc_searchbyctx(struct xdr_netobj *handle) memset(&rsci, 0, sizeof(rsci)); if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) return NULL; - found = rsc_lookup(&rsci); + found = rsc_lookup(cd, &rsci); rsc_free(&rsci); if (!found) return NULL; - if (cache_check(&rsc_cache, &found->h, NULL)) + if (cache_check(cd, &found->h, NULL)) return NULL; return found; } @@ -968,20 +964,20 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) } static inline int -gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) +gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi *rsip) { struct rsc *rsci; int rc; if (rsip->major_status != GSS_S_COMPLETE) return gss_write_null_verf(rqstp); - rsci = gss_svc_searchbyctx(&rsip->out_handle); + rsci = gss_svc_searchbyctx(cd, &rsip->out_handle); if (rsci == NULL) { rsip->major_status = GSS_S_NO_CONTEXT; return gss_write_null_verf(rqstp); } rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); - cache_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, cd); return rc; } @@ -1000,6 +996,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, struct xdr_netobj tmpobj; struct rsi *rsip, rsikey; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); /* Read the verifier; should be NULL: */ *authp = rpc_autherr_badverf; @@ -1028,17 +1025,17 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, } /* Perform upcall, or find upcall result: */ - rsip = rsi_lookup(&rsikey); + rsip = rsi_lookup(sn->rsi_cache, &rsikey); rsi_free(&rsikey); if (!rsip) return SVC_CLOSE; - if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) + if (cache_check(sn->rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ return SVC_CLOSE; ret = SVC_CLOSE; /* Got an answer to the upcall; use it: */ - if (gss_write_init_verf(rqstp, rsip)) + if (gss_write_init_verf(sn->rsc_cache, rqstp, rsip)) goto out; if (resv->iov_len + 4 > PAGE_SIZE) goto out; @@ -1055,7 +1052,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, ret = SVC_COMPLETE; out: - cache_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, sn->rsi_cache); return ret; } @@ -1079,6 +1076,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", argv->iov_len); @@ -1129,7 +1127,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: /* Look up the context, and check the verifier: */ *authp = rpcsec_gsserr_credproblem; - rsci = gss_svc_searchbyctx(&gc->gc_ctx); + rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); if (!rsci) goto auth_err; switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { @@ -1209,7 +1207,7 @@ drop: ret = SVC_DROP; out: if (rsci) - cache_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, sn->rsc_cache); return ret; } @@ -1362,6 +1360,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; @@ -1404,7 +1403,7 @@ out_err: put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd->rsci) - cache_put(&gsd->rsci->h, &rsc_cache); + cache_put(&gsd->rsci->h, sn->rsc_cache); gsd->rsci = NULL; return stat; @@ -1429,30 +1428,96 @@ static struct auth_ops svcauthops_gss = { .set_client = svcauth_gss_set_client, }; +static int rsi_cache_create_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&rsi_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->rsi_cache = cd; + return 0; +} + +static void rsi_cache_destroy_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->rsi_cache; + + sn->rsi_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + +static int rsc_cache_create_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&rsc_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->rsc_cache = cd; + return 0; +} + +static void rsc_cache_destroy_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->rsc_cache; + + sn->rsc_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + +int +gss_svc_init_net(struct net *net) +{ + int rv; + + rv = rsc_cache_create_net(net); + if (rv) + return rv; + rv = rsi_cache_create_net(net); + if (rv) + goto out1; + return 0; +out1: + rsc_cache_destroy_net(net); + return rv; +} + +void +gss_svc_shutdown_net(struct net *net) +{ + rsi_cache_destroy_net(net); + rsc_cache_destroy_net(net); +} + int gss_svc_init(void) { - int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); - if (rv) - return rv; - rv = cache_register(&rsc_cache); - if (rv) - goto out1; - rv = cache_register(&rsi_cache); - if (rv) - goto out2; - return 0; -out2: - cache_unregister(&rsc_cache); -out1: - svc_auth_unregister(RPC_AUTH_GSS); - return rv; + return svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); } void gss_svc_shutdown(void) { - cache_unregister(&rsc_cache); - cache_unregister(&rsi_cache); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 309f88ddb060..ce7bd449173d 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -10,6 +10,8 @@ struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; struct cache_detail *unix_gid_cache; + struct cache_detail *rsc_cache; + struct cache_detail *rsi_cache; struct super_block *pipefs_sb; struct mutex pipefs_sb_lock; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 38a72a1b465b..d16ac088f6d8 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -25,6 +25,7 @@ #include "netns.h" int sunrpc_net_id; +EXPORT_SYMBOL_GPL(sunrpc_net_id); extern int unix_gid_cache_create(struct net *net); extern int unix_gid_cache_destroy(struct net *net); From d05cc10406893dec65b8e89746e7d4c333935415 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 21:42:45 +0400 Subject: [PATCH 109/316] SUNRPC: ip map cache per network namespace cleanup This patch converts ip_map_cache per network namespace implemenetation to the same view, as other caches done in the series. Besides generalization, code becomes shorter with this patch. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 71 +++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a6eef38fb35c..bcd574f2ac56 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -211,7 +211,7 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (rpc_pton(&init_net, buf, len, &address.sa, sizeof(address)) == 0) + if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; switch (address.sa.sa_family) { case AF_INET: @@ -876,56 +876,45 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; +static struct cache_detail ip_map_cache_template = { + .owner = THIS_MODULE, + .hash_size = IP_HASHMAX, + .name = "auth.unix.ip", + .cache_put = ip_map_put, + .cache_upcall = ip_map_upcall, + .cache_parse = ip_map_parse, + .cache_show = ip_map_show, + .match = ip_map_match, + .init = ip_map_init, + .update = update, + .alloc = ip_map_alloc, +}; + int ip_map_cache_create(struct net *net) { - int err = -ENOMEM; - struct cache_detail *cd; - struct cache_head **tbl; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = IP_HASHMAX, - cd->hash_table = tbl, - cd->name = "auth.unix.ip", - cd->cache_put = ip_map_put, - cd->cache_upcall = ip_map_upcall, - cd->cache_parse = ip_map_parse, - cd->cache_show = ip_map_show, - cd->match = ip_map_match, - cd->init = ip_map_init, - cd->update = update, - cd->alloc = ip_map_alloc, - + cd = cache_create_net(&ip_map_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); err = cache_register_net(cd, net); - if (err) - goto err_reg; - + if (err) { + cache_destroy_net(cd, net); + return err; + } sn->ip_map_cache = cd; return 0; - -err_reg: - kfree(tbl); -err_tbl: - kfree(cd); -err_cd: - return err; } void ip_map_cache_destroy(struct net *net) { - struct sunrpc_net *sn; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->ip_map_cache; - sn = net_generic(net, sunrpc_net_id); - cache_purge(sn->ip_map_cache); - cache_unregister_net(sn->ip_map_cache, net); - kfree(sn->ip_map_cache->hash_table); - kfree(sn->ip_map_cache); + sn->ip_map_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); } From 2c5f846747526e2b83c5f1b8e69016be0e2e87c0 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 21:42:53 +0400 Subject: [PATCH 110/316] SUNRPC: generic cache register routines removed All cache users now uses network-namespace-aware routines, so generic ones are obsolete. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 -- net/sunrpc/cache.c | 12 ------------ 2 files changed, 14 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 259381ca811b..f5fd6160dbca 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -198,9 +198,7 @@ extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) extern void __init cache_initialize(void); -extern int cache_register(struct cache_detail *cd); extern int cache_register_net(struct cache_detail *cd, struct net *net); -extern void cache_unregister(struct cache_detail *cd); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a450b8ac648b..f21ece088764 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1645,12 +1645,6 @@ int cache_register_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_register_net); -int cache_register(struct cache_detail *cd) -{ - return cache_register_net(cd, &init_net); -} -EXPORT_SYMBOL_GPL(cache_register); - void cache_unregister_net(struct cache_detail *cd, struct net *net) { remove_cache_proc_entries(cd, net); @@ -1658,12 +1652,6 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -void cache_unregister(struct cache_detail *cd) -{ - cache_unregister_net(cd, &init_net); -} -EXPORT_SYMBOL_GPL(cache_unregister); - struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; From 599ec129c2f0e4da955bef685880260de1813c85 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 18:51:04 +0400 Subject: [PATCH 111/316] NFS: parse DNS cache in proper network namespace context This patch replaces "init_net" with cache's owner net in rpc_pton() call. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 7edc62a8a64f..be9a530987b3 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -224,7 +224,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) len = qword_get(&buf, buf1, sizeof(buf1)); if (len <= 0) goto out; - key.addrlen = rpc_pton(&init_net, buf1, len, + key.addrlen = rpc_pton(cd->net, buf1, len, (struct sockaddr *)&key.addr, sizeof(key.addr)); From 170942726b16a1dfcc605f0b510b9663b66fa7a3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 19 Jan 2012 19:05:57 +0400 Subject: [PATCH 112/316] NFS: decode destination address in proper network namespace context This patch replaces "init_net" with NFS client's owner net in rpc_pton() call in decode_ds_addr(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 0d8b9523a3cb..6eb59b044bfc 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -378,7 +378,7 @@ out: * Currently only supports ipv4, ipv6 and one multi-path address. */ static struct nfs4_pnfs_ds_addr * -decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) +decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) { struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; @@ -457,7 +457,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(&init_net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, + if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); goto out_free_da; @@ -625,7 +625,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(&stream, gfp_flags); + da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, + &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } From ec7652aaf261b7dcb368344369df1e99886c7cd2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 6 Dec 2011 16:42:40 +0300 Subject: [PATCH 113/316] SUNRPC: register RPC stats /proc entries in passed network namespace context This patch makes it possible to create NFS program entry ("/proc/net/rpc/nfs") in passed network namespace context instead of hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 +++--- include/linux/sunrpc/stats.h | 8 ++++---- net/sunrpc/stats.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 65486e652943..d2c760e193f4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1625,14 +1625,14 @@ static int __init init_nfs_fs(void) goto out0; #ifdef CONFIG_PROC_FS - rpc_proc_register(&nfs_rpcstat); + rpc_proc_register(&init_net, &nfs_rpcstat); #endif if ((err = register_nfs_fs()) != 0) goto out; return 0; out: #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); out0: @@ -1671,7 +1671,7 @@ static void __exit exit_nfs_fs(void) nfs_dns_resolver_destroy(); nfs_idmap_quit(); #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 680471d1f28a..f625b5746bdc 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -58,8 +58,8 @@ void rpc_modcount(struct inode *, int); #endif #ifdef CONFIG_PROC_FS -struct proc_dir_entry * rpc_proc_register(struct rpc_stat *); -void rpc_proc_unregister(const char *); +struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); +void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(struct rpc_program *); struct proc_dir_entry * svc_proc_register(struct svc_stat *, const struct file_operations *); @@ -69,8 +69,8 @@ void svc_seq_show(struct seq_file *, const struct svc_stat *); #else -static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } -static inline void rpc_proc_unregister(const char *p) {} +static inline struct proc_dir_entry *rpc_proc_register(struct net *, struct rpc_stat *s) { return NULL; } +static inline void rpc_proc_unregisterstruct net *, (const char *p) {} static inline void rpc_proc_zero(struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 80df89d957ba..f0f6e7ceadd5 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -213,28 +213,29 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); * Register/unregister RPC proc files */ static inline struct proc_dir_entry * -do_register(const char *name, void *data, const struct file_operations *fops) +do_register(struct net *net, const char *name, void *data, + const struct file_operations *fops) { struct sunrpc_net *sn; dprintk("RPC: registering /proc/net/rpc/%s\n", name); - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); return proc_create_data(name, 0, sn->proc_net_rpc, fops, data); } struct proc_dir_entry * -rpc_proc_register(struct rpc_stat *statp) +rpc_proc_register(struct net *net, struct rpc_stat *statp) { - return do_register(statp->program->name, statp, &rpc_proc_fops); + return do_register(net, statp->program->name, statp, &rpc_proc_fops); } EXPORT_SYMBOL_GPL(rpc_proc_register); void -rpc_proc_unregister(const char *name) +rpc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); @@ -242,7 +243,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) { - return do_register(statp->program->pg_name, statp, fops); + return do_register(&init_net, statp->program->pg_name, statp, fops); } EXPORT_SYMBOL_GPL(svc_proc_register); From cc9f4be5ffb86b4b483eeb15eb08aebc60a1b9a9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jan 2012 03:28:07 +0000 Subject: [PATCH 114/316] sunrpc: fix stats.h for CONFIG_PROC_FS not enabled Fix build errors in linux/sunrpc/stats.h when CONFIG_PROC_FS is not enabled: - add parameter names to inline functions - fix placement of '(' in rpc_proc_unregister() Fixes these errors: include/linux/sunrpc/stats.h:72:63: error: parameter name omitted include/linux/sunrpc/stats.h:73:46: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'net' Signed-off-by: Randy Dunlap Signed-off-by: Trond Myklebust --- include/linux/sunrpc/stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index f625b5746bdc..c5aaf7ddb5cf 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -69,8 +69,8 @@ void svc_seq_show(struct seq_file *, const struct svc_stat *); #else -static inline struct proc_dir_entry *rpc_proc_register(struct net *, struct rpc_stat *s) { return NULL; } -static inline void rpc_proc_unregisterstruct net *, (const char *p) {} +static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } +static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, From 246590f56c9f281d60b7dd7efa0818307e65600d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 6 Dec 2011 16:42:49 +0300 Subject: [PATCH 115/316] SUNRPC: register service stats /proc entries in passed network namespace context This patch makes it possible to create NFSd program entry ("/proc/net/rpc/nfsd") in passed network namespace context instead of hard-coded "init_net". Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfsd/stats.c | 5 +++-- include/linux/sunrpc/stats.h | 8 ++++---- net/sunrpc/stats.c | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index a2e2402b2afb..6d4521feb6e3 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "nfsd.h" @@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = { void nfsd_stat_init(void) { - svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); + svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); } void nfsd_stat_shutdown(void) { - svc_proc_unregister("nfsd"); + svc_proc_unregister(&init_net, "nfsd"); } diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index c5aaf7ddb5cf..76f3f7cc6e33 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -61,9 +61,9 @@ void rpc_modcount(struct inode *, int); struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(struct rpc_program *); -struct proc_dir_entry * svc_proc_register(struct svc_stat *, +struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, const struct file_operations *); -void svc_proc_unregister(const char *); +void svc_proc_unregister(struct net *, const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); @@ -73,9 +73,9 @@ static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct r static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(struct rpc_program *p) {} -static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, +static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, const struct file_operations *f) { return NULL; } -static inline void svc_proc_unregister(const char *p) {} +static inline void svc_proc_unregister(struct net *net, const char *p) {} static inline void svc_seq_show(struct seq_file *seq, const struct svc_stat *st) {} diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index f0f6e7ceadd5..3c4f6888c891 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -241,18 +241,18 @@ rpc_proc_unregister(struct net *net, const char *name) EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * -svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) +svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops) { - return do_register(&init_net, statp->program->pg_name, statp, fops); + return do_register(net, statp->program->pg_name, statp, fops); } EXPORT_SYMBOL_GPL(svc_proc_register); void -svc_proc_unregister(const char *name) +svc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); From babea479b75a9ea3d84ace6d880513e18397a8bb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 20 Jan 2012 17:19:56 +0400 Subject: [PATCH 116/316] NFS: remove unused nfs4_find_client_no_ident function Looks like this function survived after some cleanup patch without a reason. Now it's not called or referenced and I believe, that it can be simply removed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 27 --------------------------- fs/nfs/internal.h | 1 - 2 files changed, 28 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index df60d9971b95..34c8d1cbf06e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1196,33 +1196,6 @@ error: } #ifdef CONFIG_NFS_V4 -/* - * NFSv4.0 callback thread helper - * - * Find a client by IP address, protocol version, and minorversion - * - * Called from the pg_authenticate method. The callback identifier - * is not used as it has not been decoded. - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_no_ident(const struct sockaddr *addr) -{ - struct nfs_client *clp; - - spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 0) == false) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); - return clp; - } - spin_unlock(&nfs_client_lock); - return NULL; -} - /* * NFSv4.0 callback thread helper * diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2b9836fe4434..eda4cde40fb2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -149,7 +149,6 @@ extern struct rpc_program nfs_program; extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); extern struct nfs_client *nfs4_find_client_ident(int); extern struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); From 4cb54ca2069903121e4c03ec427147c47bed5755 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 20 Jan 2012 16:50:53 +0400 Subject: [PATCH 117/316] SUNRPC: search for service transports in network namespace context Service transports are parametrized by network namespace. And thus lookup of transport instance have to take network namespace into account. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- fs/lockd/svc.c | 2 +- fs/nfsd/nfsctl.c | 4 ++-- include/linux/sunrpc/svc_xprt.h | 3 ++- net/sunrpc/svc_xprt.c | 6 +++++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c061b9aa7ddb..ff379ff7761f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -193,7 +193,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, { struct svc_xprt *xprt; - xprt = svc_find_xprt(serv, name, family, 0); + xprt = svc_find_xprt(serv, name, &init_net, family, 0); if (xprt == NULL) return svc_create_xprt(serv, name, &init_net, family, port, SVC_SOCK_DEFAULTS); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 330352d379b6..64c24af8d7ea 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf) nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); + xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); @@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf) if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) return -EINVAL; - xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); + xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port); if (xprt == NULL) return -ENOTCONN; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index dfa900948af7..b3f64b12f141 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -121,7 +121,8 @@ void svc_close_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, - const sa_family_t af, const unsigned short port); + struct net *net, const sa_family_t af, + const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 74cb0d8e9ca1..de1e1a8b526b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1089,6 +1089,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * svc_find_xprt - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name + * @net: owner net pointer * @af: Address family of transport's local address * @port: transport's IP port number * @@ -1101,7 +1102,8 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * service's list that has a matching class name. */ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, - const sa_family_t af, const unsigned short port) + struct net *net, const sa_family_t af, + const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; @@ -1112,6 +1114,8 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + if (xprt->xpt_net != net) + continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) From 6eac7d3f45a2519283d38bf670cb6968230124f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 13:53:37 -0500 Subject: [PATCH 118/316] SUNRPC: constify rpc_clnt fields cl_server and cl_protname ...and get rid of the superfluous cl_inline_name. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 9 ++++----- net/sunrpc/clnt.c | 29 ++++++++++++----------------- net/sunrpc/rpcb_clnt.c | 2 +- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 9e754e3458fc..db6970ced9bc 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -41,8 +41,8 @@ struct rpc_clnt { cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ - char * cl_server; /* server machine name */ - char * cl_protname; /* protocol name */ + const char * cl_server; /* server machine name */ + const char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ struct rpc_stat * cl_stats; /* per-program statistics */ struct rpc_iostats * cl_metrics; /* per-client statistics */ @@ -62,7 +62,6 @@ struct rpc_clnt { struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; struct rpc_program * cl_program; - char cl_inline_name[32]; char *cl_principal; /* target to authenticate to */ }; @@ -97,7 +96,7 @@ struct rpc_procinfo { unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ - char * p_name; /* name of procedure */ + const char * p_name; /* name of procedure */ }; #ifdef __KERNEL__ @@ -109,7 +108,7 @@ struct rpc_create_args { size_t addrsize; struct sockaddr *saddress; const struct rpc_timeout *timeout; - char *servername; + const char *servername; struct rpc_program *program; u32 prognumber; /* overrides program->number */ u32 version; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4c6848017168..e9b22e8e16c7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -273,15 +273,9 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_err; clnt->cl_parent = clnt; - clnt->cl_server = clnt->cl_inline_name; - if (len > sizeof(clnt->cl_inline_name)) { - char *buf = kmalloc(len, GFP_KERNEL); - if (buf != NULL) - clnt->cl_server = buf; - else - len = sizeof(clnt->cl_inline_name); - } - strlcpy(clnt->cl_server, args->servername, len); + clnt->cl_server = kstrdup(args->servername, GFP_KERNEL); + if (clnt->cl_server == NULL) + goto out_no_server; clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; @@ -346,8 +340,8 @@ out_no_path: out_no_principal: rpc_free_iostats(clnt->cl_metrics); out_no_stats: - if (clnt->cl_server != clnt->cl_inline_name) - kfree(clnt->cl_server); + kfree(clnt->cl_server); +out_no_server: kfree(clnt); out_err: xprt_put(xprt); @@ -470,6 +464,9 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; + new->cl_server = kstrdup(clnt->cl_server, GFP_KERNEL); + if (new->cl_server == NULL) + goto out_no_server; new->cl_parent = clnt; /* Turn off autobind on clones */ new->cl_autobind = 0; @@ -500,6 +497,8 @@ out_no_path: out_no_principal: rpc_free_iostats(new->cl_metrics); out_no_stats: + kfree(new->cl_server); +out_no_server: kfree(new); out_no_clnt: dprintk("RPC: %s: returned error %d\n", __func__, err); @@ -565,13 +564,9 @@ rpc_free_client(struct rpc_clnt *clnt) { dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_parent != clnt) { + if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - goto out_free; - } - if (clnt->cl_server != clnt->cl_inline_name) - kfree(clnt->cl_server); -out_free: + kfree(clnt->cl_server); rpc_unregister_client(clnt); rpc_clnt_remove_pipedir(clnt); rpc_free_iostats(clnt->cl_metrics); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4ce3a8e02953..d3978017b25d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -343,7 +343,7 @@ out: return result; } -static struct rpc_clnt *rpcb_create(struct net *net, char *hostname, +static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version) { From 080b794ce5ad318ce34c52abaedf1bc6788a5abb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 13:53:56 -0500 Subject: [PATCH 119/316] SUNRPC: constify rpc_program->name Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- net/sunrpc/clnt.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index db6970ced9bc..4a46ffd73a04 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -70,12 +70,12 @@ struct rpc_clnt { */ #define RPC_MAXVERSION 4 struct rpc_program { - char * name; /* protocol name */ + const char * name; /* protocol name */ u32 number; /* program number */ unsigned int nrvers; /* number of versions */ struct rpc_version ** version; /* version array */ struct rpc_stat * stats; /* statistics */ - char * pipe_dir_name; /* path to rpc_pipefs dir */ + const char * pipe_dir_name; /* path to rpc_pipefs dir */ }; struct rpc_version { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e9b22e8e16c7..1b2317fa4043 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -118,7 +118,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) } static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, - struct rpc_clnt *clnt, char *dir_name) + struct rpc_clnt *clnt, + const char *dir_name) { static uint32_t clntid; char name[15]; @@ -151,7 +152,7 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, } static int -rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) +rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) { struct super_block *pipefs_sb; struct dentry *dentry; From a613fa168afc19179a7547fbba45644c5b6912bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 13:53:56 -0500 Subject: [PATCH 120/316] SUNRPC: constify the rpc_program Signed-off-by: Trond Myklebust --- fs/lockd/clnt4xdr.c | 2 +- fs/lockd/clntxdr.c | 8 ++++---- fs/lockd/mon.c | 8 ++++---- fs/nfs/client.c | 8 ++++---- fs/nfs/internal.h | 2 +- fs/nfs/mount_clnt.c | 10 +++++----- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 4 ++-- fs/nfs/nfs4xdr.c | 2 +- fs/nfsd/nfs4callback.c | 6 +++--- include/linux/lockd/lockd.h | 2 +- include/linux/lockd/xdr4.h | 2 +- include/linux/nfs_xdr.h | 10 +++++----- include/linux/sunrpc/clnt.h | 8 ++++---- include/linux/sunrpc/stats.h | 6 +++--- net/sunrpc/clnt.c | 8 ++++---- net/sunrpc/rpcb_clnt.c | 20 ++++++++++---------- 17 files changed, 54 insertions(+), 54 deletions(-) diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index f848b52c67b1..3ddcbb1c0a43 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = { PROC(GRANTED_RES, res, norep), }; -struct rpc_version nlm_version4 = { +const struct rpc_version nlm_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nlm4_procedures), .procs = nlm4_procedures, diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 180ac34feb9a..3d35e3e80c1c 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = { PROC(GRANTED_RES, res, norep), }; -static struct rpc_version nlm_version1 = { +static const struct rpc_version nlm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version nlm_version3 = { +static const struct rpc_version nlm_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version *nlm_versions[] = { +static const struct rpc_version *nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, #ifdef CONFIG_LOCKD_V4 @@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = { static struct rpc_stat nlm_rpc_stats; -struct rpc_program nlm_program = { +const struct rpc_program nlm_program = { .name = "lockd", .number = NLM_PROGRAM, .nrvers = ARRAY_SIZE(nlm_versions), diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 65ba36b80a9e..c196030e530a 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -47,7 +47,7 @@ struct nsm_res { u32 state; }; -static struct rpc_program nsm_program; +static const struct rpc_program nsm_program; static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); @@ -534,19 +534,19 @@ static struct rpc_procinfo nsm_procedures[] = { }, }; -static struct rpc_version nsm_version1 = { +static const struct rpc_version nsm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nsm_procedures), .procs = nsm_procedures }; -static struct rpc_version * nsm_version[] = { +static const struct rpc_version *nsm_version[] = { [1] = &nsm_version1, }; static struct rpc_stat nsm_stats; -static struct rpc_program nsm_program = { +static const struct rpc_program nsm_program = { .name = "statd", .number = NSM_PROGRAM, .nrvers = ARRAY_SIZE(nsm_version), diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 34c8d1cbf06e..98af1cb28ee3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -89,7 +89,7 @@ static bool nfs4_disable_idmapping = true; /* * RPC cruft for NFS */ -static struct rpc_version *nfs_version[5] = { +static const struct rpc_version *nfs_version[5] = { [2] = &nfs_version2, #ifdef CONFIG_NFS_V3 [3] = &nfs_version3, @@ -99,7 +99,7 @@ static struct rpc_version *nfs_version[5] = { #endif }; -struct rpc_program nfs_program = { +const struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), @@ -115,11 +115,11 @@ struct rpc_stat nfs_rpcstat = { #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { +static const struct rpc_version *nfsacl_version[] = { [3] = &nfsacl_version3, }; -struct rpc_program nfsacl_program = { +const struct rpc_program nfsacl_program = { .name = "nfsacl", .number = NFS_ACL_PROGRAM, .nrvers = ARRAY_SIZE(nfsacl_version), diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index eda4cde40fb2..cdb121d3c6f4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -145,7 +145,7 @@ extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ -extern struct rpc_program nfs_program; +extern const struct rpc_program nfs_program; extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 4fbe3a8e5e6b..b37ca34af903 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -67,7 +67,7 @@ enum { MOUNTPROC3_EXPORT = 5, }; -static struct rpc_program mnt_program; +static const struct rpc_program mnt_program; /* * Defined by OpenGroup XNFS Version 3W, chapter 8 @@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = { }; -static struct rpc_version mnt_version1 = { +static const struct rpc_version mnt_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { +static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; -static struct rpc_version *mnt_version[] = { +static const struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, @@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = { static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static const struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 792cb13a4304..1f56000fabbd 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = { PROC(STATFS, fhandle, statfsres, 0), }; -struct rpc_version nfs_version2 = { +const struct rpc_version nfs_version2 = { .number = 2, .nrprocs = ARRAY_SIZE(nfs_procedures), .procs = nfs_procedures diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 183c6b123d0f..a77cc9a3ce55 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = { PROC(COMMIT, commit, commit, 5), }; -struct rpc_version nfs_version3 = { +const struct rpc_version nfs_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nfs3_procedures), .procs = nfs3_procedures @@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { }, }; -struct rpc_version nfsacl_version3 = { +const struct rpc_version nfsacl_version3 = { .number = 3, .nrprocs = sizeof(nfs3_acl_procedures)/ sizeof(nfs3_acl_procedures[0]), diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 95e92e438407..4633d405a94c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7109,7 +7109,7 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ }; -struct rpc_version nfs_version4 = { +const struct rpc_version nfs_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nfs4_procedures), .procs = nfs4_procedures diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 426ccb171650..0e262f32ac41 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -605,18 +605,18 @@ static struct rpc_version nfs_cb_version4 = { .procs = nfs4_cb_procedures }; -static struct rpc_version *nfs_cb_version[] = { +static const struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -static struct rpc_program cb_program; +static const struct rpc_program cb_program; static struct rpc_stat cb_stats = { .program = &cb_program }; #define NFS4_CALLBACK 0x40000000 -static struct rpc_program cb_program = { +static const struct rpc_program cb_program = { .name = "nfs4_cb", .number = NFS4_CALLBACK, .nrvers = ARRAY_SIZE(nfs_cb_version), diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 88a114fce477..8949167a148d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -188,7 +188,7 @@ struct nlm_block { /* * Global variables */ -extern struct rpc_program nlm_program; +extern const struct rpc_program nlm_program; extern struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 extern struct svc_procedure nlmsvc_procedures4[]; diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 7353821341ed..e58c88b52ce1 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -42,6 +42,6 @@ int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *); */ -extern struct rpc_version nlm_version4; +extern const struct rpc_version nlm_version4; #endif /* LOCKD_XDR4_H */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f5188e10ea0e..144419a9cbd3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1274,11 +1274,11 @@ struct nfs_rpc_ops { extern const struct nfs_rpc_ops nfs_v2_clientops; extern const struct nfs_rpc_ops nfs_v3_clientops; extern const struct nfs_rpc_ops nfs_v4_clientops; -extern struct rpc_version nfs_version2; -extern struct rpc_version nfs_version3; -extern struct rpc_version nfs_version4; +extern const struct rpc_version nfs_version2; +extern const struct rpc_version nfs_version3; +extern const struct rpc_version nfs_version4; -extern struct rpc_version nfsacl_version3; -extern struct rpc_program nfsacl_program; +extern const struct rpc_version nfsacl_version3; +extern const struct rpc_program nfsacl_program; #endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4a46ffd73a04..a4c62e95c720 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -61,7 +61,7 @@ struct rpc_clnt { struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; - struct rpc_program * cl_program; + const struct rpc_program *cl_program; char *cl_principal; /* target to authenticate to */ }; @@ -73,7 +73,7 @@ struct rpc_program { const char * name; /* protocol name */ u32 number; /* program number */ unsigned int nrvers; /* number of versions */ - struct rpc_version ** version; /* version array */ + const struct rpc_version ** version; /* version array */ struct rpc_stat * stats; /* statistics */ const char * pipe_dir_name; /* path to rpc_pipefs dir */ }; @@ -109,7 +109,7 @@ struct rpc_create_args { struct sockaddr *saddress; const struct rpc_timeout *timeout; const char *servername; - struct rpc_program *program; + const struct rpc_program *program; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -128,7 +128,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - struct rpc_program *, u32); + const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 76f3f7cc6e33..edc64219f92b 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -12,7 +12,7 @@ #include struct rpc_stat { - struct rpc_program * program; + const struct rpc_program *program; unsigned int netcnt, netudpcnt, @@ -60,7 +60,7 @@ void rpc_modcount(struct inode *, int); #ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); -void rpc_proc_zero(struct rpc_program *); +void rpc_proc_zero(const struct rpc_program *); struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, const struct file_operations *); void svc_proc_unregister(struct net *, const char *); @@ -71,7 +71,7 @@ void svc_seq_show(struct seq_file *, static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} -static inline void rpc_proc_zero(struct rpc_program *p) {} +static inline void rpc_proc_zero(const struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, const struct file_operations *f) { return NULL; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1b2317fa4043..db7220d87732 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -238,8 +238,8 @@ void rpc_clients_notifier_unregister(void) static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { - struct rpc_program *program = args->program; - struct rpc_version *version; + const struct rpc_program *program = args->program; + const struct rpc_version *version; struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; @@ -626,11 +626,11 @@ rpc_release_client(struct rpc_clnt *clnt) * The Sun NFSv2/v3 ACL protocol can do this. */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, - struct rpc_program *program, + const struct rpc_program *program, u32 vers) { struct rpc_clnt *clnt; - struct rpc_version *version; + const struct rpc_version *version; int err; BUG_ON(vers >= program->nrvers || !program->version[vers]); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d3978017b25d..b1f08bd67883 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -112,7 +112,7 @@ enum { static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); -static struct rpc_program rpcb_program; +static const struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,8 +137,8 @@ struct rpcb_info { struct rpc_procinfo * rpc_proc; }; -static struct rpcb_info rpcb_next_version[]; -static struct rpcb_info rpcb_next_version6[]; +static const struct rpcb_info rpcb_next_version[]; +static const struct rpcb_info rpcb_next_version6[]; static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, @@ -1051,7 +1051,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, }; -static struct rpcb_info rpcb_next_version[] = { +static const struct rpcb_info rpcb_next_version[] = { { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -1061,7 +1061,7 @@ static struct rpcb_info rpcb_next_version[] = { }, }; -static struct rpcb_info rpcb_next_version6[] = { +static const struct rpcb_info rpcb_next_version6[] = { { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -1075,25 +1075,25 @@ static struct rpcb_info rpcb_next_version6[] = { }, }; -static struct rpc_version rpcb_version2 = { +static const struct rpc_version rpcb_version2 = { .number = RPCBVERS_2, .nrprocs = ARRAY_SIZE(rpcb_procedures2), .procs = rpcb_procedures2 }; -static struct rpc_version rpcb_version3 = { +static const struct rpc_version rpcb_version3 = { .number = RPCBVERS_3, .nrprocs = ARRAY_SIZE(rpcb_procedures3), .procs = rpcb_procedures3 }; -static struct rpc_version rpcb_version4 = { +static const struct rpc_version rpcb_version4 = { .number = RPCBVERS_4, .nrprocs = ARRAY_SIZE(rpcb_procedures4), .procs = rpcb_procedures4 }; -static struct rpc_version *rpcb_version[] = { +static const struct rpc_version *rpcb_version[] = { NULL, NULL, &rpcb_version2, @@ -1103,7 +1103,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -static struct rpc_program rpcb_program = { +static const struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), From 82b0a4c3c171b180629696e8d1d5f52516f711e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 14:52:23 -0500 Subject: [PATCH 121/316] SUNRPC: Add trace events to the sunrpc subsystem Add declarations to allow tracing of RPC call creation, running, sleeping, and destruction. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 +- include/trace/events/sunrpc.h | 124 ++++++++++++++++++++++++++++++++++ net/sunrpc/sched.c | 12 ++++ 3 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/sunrpc.h diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index bd337f990a41..f7b2df5252b0 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -271,7 +271,7 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri } #ifdef RPC_DEBUG -static inline const char * rpc_qname(struct rpc_wait_queue *q) +static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); } diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h new file mode 100644 index 000000000000..51cc9490919f --- /dev/null +++ b/include/trace/events/sunrpc.h @@ -0,0 +1,124 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sunrpc + +#if !defined(_TRACE_SUNRPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SUNRPC_H + +#include +#include +#include + +DECLARE_EVENT_CLASS(rpc_task_running, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action), + + TP_STRUCT__entry( + __field(const struct rpc_clnt *, clnt) + __field(const struct rpc_task *, task) + __field(const void *, action) + __field(unsigned long, runstate) + __field(int, status) + __field(unsigned short, flags) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->task = task; + __entry->action = action; + __entry->runstate = task->tk_runstate; + __entry->status = task->tk_status; + __entry->flags = task->tk_flags; + ), + + TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf", + __entry->task, + __entry->clnt, + __entry->flags, + __entry->runstate, + __entry->status, + __entry->action + ) +); + +DEFINE_EVENT(rpc_task_running, rpc_task_begin, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DEFINE_EVENT(rpc_task_running, rpc_task_run_action, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DEFINE_EVENT(rpc_task_running, rpc_task_complete, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DECLARE_EVENT_CLASS(rpc_task_queued, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q), + + TP_STRUCT__entry( + __field(const struct rpc_clnt *, clnt) + __field(const struct rpc_task *, task) + __field(const struct rpc_wait_queue *, queue) + __field(unsigned long, timeout) + __field(unsigned long, runstate) + __field(int, status) + __field(unsigned short, flags) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->task = task; + __entry->queue = q; + __entry->timeout = task->tk_timeout; + __entry->runstate = task->tk_runstate; + __entry->status = task->tk_status; + __entry->flags = task->tk_flags; + ), + + TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", + __entry->task, + __entry->clnt, + __entry->flags, + __entry->runstate, + __entry->status, + __entry->timeout, + rpc_qname(__entry->queue) + ) +); + +DEFINE_EVENT(rpc_task_queued, rpc_task_sleep, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q) + +); + +DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q) + +); + +#endif /* _TRACE_SUNRPC_H */ + +#include diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f982dfe53993..d79c63df49b8 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -28,6 +28,9 @@ #define RPCDBG_FACILITY RPCDBG_SCHED #endif +#define CREATE_TRACE_POINTS +#include + /* * RPC slabs and memory pools */ @@ -251,6 +254,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { + trace_rpc_task_begin(task->tk_client, task, NULL); + rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } @@ -267,6 +272,8 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; + trace_rpc_task_complete(task->tk_client, task, NULL); + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); @@ -324,6 +331,8 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); + trace_rpc_task_sleep(task->tk_client, task, q); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); @@ -378,6 +387,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } + trace_rpc_task_wakeup(task->tk_client, task, queue); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -701,6 +712,7 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } + trace_rpc_task_run_action(task->tk_client, task, task->tk_action); do_action(task); /* From 4601df20fb3bf2b87e248abc622b8a7e4c3059fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 18:47:05 -0500 Subject: [PATCH 122/316] NFSv4: Avoid thundering herd issues with nfs_release_seqid Store a pointer to the rpc_task in struct nfs_seqid so that we can wake up only that request that is able to grab the lock after we've released it. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c45c21a5470f..b133b50dec9a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -71,6 +71,7 @@ struct nfs_seqid_counter { struct nfs_seqid { struct nfs_seqid_counter *sequence; struct list_head list; + struct rpc_task *task; }; static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f0e9881c2aa2..7d098604802c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -916,20 +916,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m if (new != NULL) { new->sequence = counter; INIT_LIST_HEAD(&new->list); + new->task = NULL; } return new; } void nfs_release_seqid(struct nfs_seqid *seqid) { - if (!list_empty(&seqid->list)) { - struct nfs_seqid_counter *sequence = seqid->sequence; + struct nfs_seqid_counter *sequence; - spin_lock(&sequence->lock); - list_del_init(&seqid->list); - spin_unlock(&sequence->lock); - rpc_wake_up(&sequence->wait); + if (list_empty(&seqid->list)) + return; + sequence = seqid->sequence; + spin_lock(&sequence->lock); + list_del_init(&seqid->list); + if (!list_empty(&sequence->list)) { + struct nfs_seqid *next; + + next = list_first_entry(&sequence->list, + struct nfs_seqid, list); + rpc_wake_up_queued_task(&sequence->wait, next->task); } + spin_unlock(&sequence->lock); } void nfs_free_seqid(struct nfs_seqid *seqid) @@ -1000,6 +1008,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) int status = 0; spin_lock(&sequence->lock); + seqid->task = task; if (list_empty(&seqid->list)) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) From 7d9dea915fe333357912bce2d624ee848dfbd890 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 18:57:02 -0500 Subject: [PATCH 123/316] NFS: Use kcalloc() when allocating arrays Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extents.c | 2 +- fs/nfs/pnfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1abac09f7cd5..1f9a6032796b 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks, count = (int)(end - start) / (int)tree->mtt_step_size; /* Pre-malloc what memory we might need */ - storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); + storage = kcalloc(count, sizeof(*storage), GFP_NOFS); if (!storage) return -ENOMEM; for (i = 0; i < count; i++) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 17149a490065..92927878c2f8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -590,7 +590,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); + pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; From d36b7cf7c626749efc75f49fb0468e8c3c0c1bbd Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 Jan 2012 16:03:39 +0200 Subject: [PATCH 124/316] pnfs: clean up initiate_file_draining layout lookup Fixes the following compiler warning: fs/nfs/callback_proc.c: In function 'do_callback_layoutrecall': fs/nfs/callback_proc.c:115:26: warning: 'lo' may be used uninitialized in this function Reported-by: Jim Rees Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 56 ++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 54cea8ad5a76..0e6e63f55db4 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -108,42 +108,62 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf #if defined(CONFIG_NFS_V4_1) -static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) +/* + * Lookup a layout by filehandle. + * + * Note: gets a refcount on the layout hdr and on its respective inode. + * Caller must put the layout hdr and the inode. + * + * TODO: keep track of all layouts (and delegations) in a hash table + * hashed by filehandle. + */ +static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) { struct nfs_server *server; - struct pnfs_layout_hdr *lo; struct inode *ino; - bool found = false; - u32 rv = NFS4ERR_NOMATCHING_LAYOUT; - LIST_HEAD(free_me_list); + struct pnfs_layout_hdr *lo; - spin_lock(&clp->cl_lock); - rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) + if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) continue; ino = igrab(lo->plh_inode); if (!ino) continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ get_layout_hdr(lo); - break; + return lo; } - if (found) - break; } + + return NULL; +} + +static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); + lo = get_layout_by_fh_locked(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (!found) + return lo; +} + +static u32 initiate_file_draining(struct nfs_client *clp, + struct cb_layoutrecallargs *args) +{ + struct inode *ino; + struct pnfs_layout_hdr *lo; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + LIST_HEAD(free_me_list); + + lo = get_layout_by_fh(clp, &args->cbl_fh); + if (!lo) return NFS4ERR_NOMATCHING_LAYOUT; + ino = lo->plh_inode; spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || mark_matching_lsegs_invalid(lo, &free_me_list, From d3b773e4fd80524ac27802fcf11cfd9ed1d5491f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 Jan 2012 13:22:45 -0500 Subject: [PATCH 125/316] SUNRPC: fixup for namespace changes Fixes this build error when CONFIG_NET_NS is not set: net/sunrpc/svcsock.c: In function 'svc_setup_socket': net/sunrpc/svcsock.c:1412:40: error: 'struct sock_common' has no member named 'skc_net' Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e8af0c9e436b..e088b1633d36 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1409,7 +1409,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, sock->sk->sk_net, inet->sk_family, + *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); From c15c928f36a2710746c2b945067215f436f45544 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 24 Jan 2012 16:35:00 +0000 Subject: [PATCH 126/316] nfs: remove unneeded NULL pointer check in nfs4_remote_mount "data" is never NULL here. Reported-by: Eric Paris Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b79f2a11c29e..8e210b2c16d7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2667,8 +2667,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie( - s, data ? data->fscache_uniq : NULL, NULL); + nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); } mntroot = nfs4_get_root(s, mntfh, dev_name); From 8b7e3f49ddda0d43c5bc8de404c1dc7e7a13cc80 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jan 2012 15:43:56 -0500 Subject: [PATCH 127/316] NFSv4: Don't decode fs_locations if we didn't ask for them... Currently, the server can potentially cause us to Oops by returning an fs_locations request that we didn't actually request. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4633d405a94c..ca288d115b54 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3561,6 +3561,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; + status = -EIO; + /* Ignore borken servers that return unrequested attrs */ + if (unlikely(res == NULL)) + goto out; dprintk("%s: fsroot ", __func__); status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) @@ -4295,6 +4299,7 @@ xdr_error: static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, + struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { int status; @@ -4342,9 +4347,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, - struct nfs4_fs_locations, - fattr)); + status = decode_attr_fs_locations(xdr, bitmap, fs_loc); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4408,7 +4411,8 @@ xdr_error: } static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs_fh *fh, const struct nfs_server *server) + struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, + const struct nfs_server *server) { __be32 *savep; uint32_t attrlen, @@ -4427,7 +4431,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); if (status < 0) goto xdr_error; @@ -4440,7 +4444,7 @@ xdr_error: static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); } /* @@ -6580,8 +6584,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, if (status) goto out; xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr(xdr, &res->fs_locations->fattr, - res->fs_locations->server); + status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, + NULL, res->fs_locations, + res->fs_locations->server); out: return status; } @@ -6961,7 +6966,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - entry->server) < 0) + NULL, entry->server) < 0) goto out_overflow; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; From a4980e7840176b4baa60715c32c5994b084ea9a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jan 2012 15:43:56 -0500 Subject: [PATCH 128/316] NFSv4: ACCESS validation doesn't require a full attribute refresh We only really need to check the change attribute, so let's just use the server->cache_consistency_bitmask. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 828a76590af9..1bb0be36a726 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2512,7 +2512,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->attr_bitmask, + .bitmask = server->cache_consistency_bitmask, }; struct nfs4_accessres res = { .server = server, From 7df898b1a70b13c3a8892625f4ead929d9554293 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 31 Jan 2012 09:16:59 +1000 Subject: [PATCH 129/316] drm/nouveau/disp: check that panel power gpio is enabled at init time Reported-by: Yuriy Khomchik Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bios.h | 5 +++-- drivers/gpu/drm/nouveau/nouveau_display.c | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h index 1e382ad5a2b8..a37c31e358aa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.h +++ b/drivers/gpu/drm/nouveau/nouveau_bios.h @@ -54,9 +54,10 @@ struct bit_entry { int bit_table(struct drm_device *, u8 id, struct bit_entry *); enum dcb_gpio_tag { - DCB_GPIO_TVDAC0 = 0xc, + DCB_GPIO_PANEL_POWER = 0x01, + DCB_GPIO_TVDAC0 = 0x0c, DCB_GPIO_TVDAC1 = 0x2d, - DCB_GPIO_PWM_FAN = 0x9, + DCB_GPIO_PWM_FAN = 0x09, DCB_GPIO_FAN_SENSE = 0x3d, DCB_GPIO_UNUSED = 0xff }; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 3cb52bc52b21..795a9e3c990a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -219,6 +219,16 @@ nouveau_display_init(struct drm_device *dev) if (ret) return ret; + /* power on internal panel if it's not already. the init tables of + * some vbios default this to off for some reason, causing the + * panel to not work after resume + */ + if (nouveau_gpio_func_get(dev, DCB_GPIO_PANEL_POWER) == 0) { + nouveau_gpio_func_set(dev, DCB_GPIO_PANEL_POWER, true); + msleep(300); + } + + /* enable polling for external displays */ drm_kms_helper_poll_enable(dev); /* enable hotplug interrupts */ From ce2e7895faba8fabaa917f52293126e5f4174fa9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Feb 2012 15:08:59 +1000 Subject: [PATCH 130/316] drm/nouveau/mxm: pretend to succeed, even if we can't shadow the MXM-SIS There's at least one known case where our shadowing code is buggy, and we fail init. Until we can be confident we're doing all this correctly, lets succeed and risk crazy bios tables rather than failing for perfectly valid configs too. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mxm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_mxm.c b/drivers/gpu/drm/nouveau/nouveau_mxm.c index 8bccddf4eff0..e5a64f0f4cb7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mxm.c +++ b/drivers/gpu/drm/nouveau/nouveau_mxm.c @@ -656,7 +656,16 @@ nouveau_mxm_init(struct drm_device *dev) if (mxm_shadow(dev, mxm[0])) { MXM_MSG(dev, "failed to locate valid SIS\n"); +#if 0 + /* we should, perhaps, fall back to some kind of limited + * mode here if the x86 vbios hasn't already done the + * work for us (so we prevent loading with completely + * whacked vbios tables). + */ return -EINVAL; +#else + return 0; +#endif } MXM_MSG(dev, "MXMS Version %d.%d\n", From 1eb8a619b43c1e99179ebadbc9c614ed37358f2d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jan 2012 16:48:52 +1000 Subject: [PATCH 131/316] drm/nouveau: fix typo on mxmdcb option Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index e4a7cfe7898d..81d7962e7252 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -124,7 +124,7 @@ MODULE_PARM_DESC(ctxfw, "Use external HUB/GPC ucode (fermi)\n"); int nouveau_ctxfw; module_param_named(ctxfw, nouveau_ctxfw, int, 0400); -MODULE_PARM_DESC(ctxfw, "Santise DCB table according to MXM-SIS\n"); +MODULE_PARM_DESC(mxmdcb, "Santise DCB table according to MXM-SIS\n"); int nouveau_mxmdcb = 1; module_param_named(mxmdcb, nouveau_mxmdcb, int, 0400); From 525895ba388c949aa906f26e3ec5cb1ab041f56b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 10 Jan 2012 10:18:28 +1000 Subject: [PATCH 132/316] drm/nouveau/gem: fix fence_sync race / oops Due to a race it was possible for a fence to be destroyed while another thread was trying to synchronise with it. If this happened in the fallback non-semaphore path, it lead to the following oops due to fence->channel being NULL. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] nouveau_fence_update+0xe/0xe0 [nouveau] *pde = a649c067 SMP Modules linked in: fuse nouveau(O) ttm(O) drm_kms_helper(O) drm(O) mxm_wmi video wmi netconsole configfs lockd bnep bluetooth rfkill ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_cobinfmt_misc uinput ata_generic pata_acpi pata_aet2c_algo_bit i2c_core [last unloaded: wmi] Pid: 2255, comm: gnome-shell Tainted: G O 3.2.0-0.rc5.git0.1.fc17.i686 #1 System manufacturer System Product Name/M2A-VM EIP: 0060:[] EFLAGS: 00010296 CPU: 1 EIP is at nouveau_fence_update+0xe/0xe0 [nouveau] EAX: 00000000 EBX: ddfc6dd0 ECX: dd111580 EDX: 00000000 ESI: 00003e80 EDI: dd111580 EBP: dd121d00 ESP: dd121ce8 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process gnome-shell (pid: 2255, ti=dd120000 task=dd111580 task.ti=dd120000) Stack: 7dc86c76 00000000 00003e80 ddfc6dd0 00003e80 dd111580 dd121d0c fa96371f 00000000 dd121d3c fa963773 dd111580 01000246 000ec53d 00000000 ddfc6dd0 00001f40 00000000 ddfc6dd0 00000010 dc7df840 dd121d6c fa9639a0 00000000 Call Trace: [] __nouveau_fence_signalled+0x1f/0x30 [nouveau] [] __nouveau_fence_wait+0x43/0xd0 [nouveau] [] nouveau_fence_sync+0x1a0/0x1c0 [nouveau] [] validate_list+0x176/0x300 [nouveau] [] ? ttm_bo_mem_put+0x30/0x30 [ttm] [] nouveau_gem_ioctl_pushbuf+0x48a/0xfd0 [nouveau] [] ? die+0x31/0x80 [] drm_ioctl+0x388/0x490 [drm] [] ? die+0x31/0x80 [] ? nouveau_gem_ioctl_new+0x150/0x150 [nouveau] [] ? file_has_perm+0xcb/0xe0 [] ? drm_copy_field+0x80/0x80 [drm] [] do_vfs_ioctl+0x86/0x5b0 [] ? die+0x31/0x80 [] ? selinux_file_ioctl+0x62/0x130 [] ? fget_light+0x30/0x340 [] sys_ioctl+0x6f/0x80 [] syscall_call+0x7/0xb [] ? die+0x31/0x80 [] ? die+0x31/0x80 Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_gem.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 5f0bc57fdaab..7ce3fde40743 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -379,6 +379,25 @@ retry: return 0; } +static int +validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) +{ + struct nouveau_fence *fence = NULL; + int ret = 0; + + spin_lock(&nvbo->bo.bdev->fence_lock); + if (nvbo->bo.sync_obj) + fence = nouveau_fence_ref(nvbo->bo.sync_obj); + spin_unlock(&nvbo->bo.bdev->fence_lock); + + if (fence) { + ret = nouveau_fence_sync(fence, chan); + nouveau_fence_unref(&fence); + } + + return ret; +} + static int validate_list(struct nouveau_channel *chan, struct list_head *list, struct drm_nouveau_gem_pushbuf_bo *pbbo, uint64_t user_pbbo_ptr) @@ -393,7 +412,7 @@ validate_list(struct nouveau_channel *chan, struct list_head *list, list_for_each_entry(nvbo, list, entry) { struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index]; - ret = nouveau_fence_sync(nvbo->bo.sync_obj, chan); + ret = validate_sync(chan, nvbo); if (unlikely(ret)) { NV_ERROR(dev, "fail pre-validate sync\n"); return ret; @@ -416,7 +435,7 @@ validate_list(struct nouveau_channel *chan, struct list_head *list, return ret; } - ret = nouveau_fence_sync(nvbo->bo.sync_obj, chan); + ret = validate_sync(chan, nvbo); if (unlikely(ret)) { NV_ERROR(dev, "fail post-validate sync\n"); return ret; From a9d993882008a1ae2c953064f0c2ca7e604b1333 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jan 2012 10:20:47 +0300 Subject: [PATCH 133/316] drm/nv50/pm: signedness bug in nv50_pm_clocks_pre() calc_mclk() returns zero on success and negative on failure but clk is a u32. v2: Martin Peres: - clk should be an int, not a u32 Signed-off-by: Martin Peres Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c index 03937212e9d8..ec5481dfcd82 100644 --- a/drivers/gpu/drm/nouveau/nv50_pm.c +++ b/drivers/gpu/drm/nouveau/nv50_pm.c @@ -495,9 +495,9 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv50_pm_state *info; struct pll_lims pll; - int ret = -EINVAL; + int clk, ret = -EINVAL; int N, M, P1, P2; - u32 clk, out; + u32 out; if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) From f70eecde3bca92630d3886496e73316ff353f185 Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Tue, 31 Jan 2012 13:04:41 -0800 Subject: [PATCH 134/316] ALSA: hda - Fix calling cs_automic twice for Cirrus codecs. If cs_automic is called twice (like it is during init) while the mic is present, it will over-write the last_input with the new one, causing it to switch back to the automic input when the mic is unplugged. This leaves the driver in a state (cur_input, last_input, and automix_idx the same) where the internal mic can not be selected until it is rebooted without the mic attached. Check that the mic hasn't already been switched to before setting last_input. Signed-off-by: Dylan Reid Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cirrus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 0e99357e822c..bc5a993d1146 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -988,8 +988,10 @@ static void cs_automic(struct hda_codec *codec) change_cur_input(codec, !spec->automic_idx, 0); } else { if (present) { - spec->last_input = spec->cur_input; - spec->cur_input = spec->automic_idx; + if (spec->cur_input != spec->automic_idx) { + spec->last_input = spec->cur_input; + spec->cur_input = spec->automic_idx; + } } else { spec->cur_input = spec->last_input; } From 54c2a89f60fd71b924d0f848ac892442951401a6 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 1 Feb 2012 12:05:41 +0100 Subject: [PATCH 135/316] ALSA: HDA: Fix duplicated output to more than one codec This typo caused the wrong codec's nid to be checked for wcaps type. As a result, sometimes speakers would duplicate the output sent to HDMI output. Cc: stable@kernel.org BugLink: https://bugs.launchpad.net/bugs/924320 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 4df72c0e8c37..c2c65f63bf06 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1447,7 +1447,7 @@ void snd_hda_codec_setup_stream(struct hda_codec *codec, hda_nid_t nid, for (i = 0; i < c->cvt_setups.used; i++) { p = snd_array_elem(&c->cvt_setups, i); if (!p->active && p->stream_tag == stream_tag && - get_wcaps_type(get_wcaps(codec, p->nid)) == type) + get_wcaps_type(get_wcaps(c, p->nid)) == type) p->dirty = 1; } } From c70c471c585a3fc1a10c792d5121b3803c83dde0 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:17 +0200 Subject: [PATCH 136/316] lib/mpi: added missing NULL check Added missing NULL check after mpi_alloc(). Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpicoder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 716802b774ea..6116fc4990da 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -79,7 +79,8 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) } a = mpi_alloc((nframe + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB); - mpi_set_buffer(a, frame, nframe, 0); + if (a) + mpi_set_buffer(a, frame, nframe, 0); kfree(frame); return a; From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:16 +0200 Subject: [PATCH 137/316] lib/mpi: replaced MPI_NULL with normal NULL MPI_NULL is replaced with normal NULL. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/mpi.h | 2 -- lib/mpi/mpicoder.c | 8 ++++---- lib/mpi/mpiutil.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 06f88994ccaa..d02cca6cc8ce 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -57,8 +57,6 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; -#define MPI_NULL NULL - #define mpi_get_nlimbs(a) ((a)->nlimbs) #define mpi_is_neg(a) ((a)->sign) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 6116fc4990da..d7684aa7f65c 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) uint8_t *frame, *fr_pt; int i = 0, n; size_t asnlen = DIM(asn); - MPI a = MPI_NULL; + MPI a = NULL; if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe) pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n", @@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) */ frame = kmalloc(nframe, GFP_KERNEL); if (!frame) - return MPI_NULL; + return NULL; n = 0; frame[n++] = 0; frame[n++] = 1; /* block type */ @@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) int i, j; unsigned nbits, nbytes, nlimbs, nread = 0; mpi_limb_t a; - MPI val = MPI_NULL; + MPI val = NULL; if (*ret_nread < 2) goto leave; @@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; val = mpi_alloc(nlimbs); if (!val) - return MPI_NULL; + return NULL; i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; val->nbits = nbits; diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index eefc55d6b7f5..6bfc41f62b8f 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a) size_t i; MPI b; - *copied = MPI_NULL; + *copied = NULL; if (a) { b = mpi_alloc(a->nlimbs); From e87c5e35a92e045de75fb6ae9846a38bdd0f92bd Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:21 +0200 Subject: [PATCH 138/316] lib/mpi: return error code on dividing by zero Definitely better to return error code than to divide by zero. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpi-pow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index b04a3cf80080..67f3e79af914 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -59,7 +59,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) ep = exp->d; if (!msize) - msize = 1 / msize; /* provoke a signal */ + return -EINVAL; if (!esize) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 From e2fe85c236736c866481de288f636ab06ef49787 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:22 +0200 Subject: [PATCH 139/316] lib/mpi: checks for zero divisor length Divisor length should not be 0. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpi-div.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c index c3087d1390ce..8a45717bd001 100644 --- a/lib/mpi/mpi-div.c +++ b/lib/mpi/mpi-div.c @@ -149,6 +149,9 @@ int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den) mpi_ptr_t marker[5]; int markidx = 0; + if (!dsize) + return -EINVAL; + memset(marker, 0, sizeof(marker)); /* Ensure space is enough for quotient and remainder. From bc95eeadf5c6fd9e9840898a83a93718a0114b6d Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:23 +0200 Subject: [PATCH 140/316] lib/mpi: removed unused functions do_encode_md() and mpi_get_keyid() are not parts of mpi library. They were used early versions of gnupg and in digsig project, but they are not used neither here nor there anymore. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpicoder.c | 88 ---------------------------------------------- 1 file changed, 88 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index d7684aa7f65c..f26b41fcb48c 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -20,72 +20,8 @@ #include "mpi-internal.h" -#define DIM(v) (sizeof(v)/sizeof((v)[0])) #define MAX_EXTERN_MPI_BITS 16384 -static uint8_t asn[15] = /* Object ID is 1.3.14.3.2.26 */ -{ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03, - 0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 -}; - -MPI do_encode_md(const void *sha_buffer, unsigned nbits) -{ - int nframe = (nbits + 7) / 8; - uint8_t *frame, *fr_pt; - int i = 0, n; - size_t asnlen = DIM(asn); - MPI a = NULL; - - if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe) - pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n", - (int)(SHA1_DIGEST_LENGTH * 8), (int)nbits); - - /* We encode the MD in this way: - * - * 0 A PAD(n bytes) 0 ASN(asnlen bytes) MD(len bytes) - * - * PAD consists of FF bytes. - */ - frame = kmalloc(nframe, GFP_KERNEL); - if (!frame) - return NULL; - n = 0; - frame[n++] = 0; - frame[n++] = 1; /* block type */ - i = nframe - SHA1_DIGEST_LENGTH - asnlen - 3; - - if (i <= 1) { - pr_info("MPI: message digest encoding failed\n"); - kfree(frame); - return a; - } - - memset(frame + n, 0xff, i); - n += i; - frame[n++] = 0; - memcpy(frame + n, &asn, asnlen); - n += asnlen; - memcpy(frame + n, sha_buffer, SHA1_DIGEST_LENGTH); - n += SHA1_DIGEST_LENGTH; - - i = nframe; - fr_pt = frame; - - if (n != nframe) { - printk - ("MPI: message digest encoding failed, frame length is wrong\n"); - kfree(frame); - return a; - } - - a = mpi_alloc((nframe + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB); - if (a) - mpi_set_buffer(a, frame, nframe, 0); - kfree(frame); - - return a; -} - MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; @@ -212,30 +148,6 @@ int mpi_fromstr(MPI val, const char *str) } EXPORT_SYMBOL_GPL(mpi_fromstr); -/**************** - * Special function to get the low 8 bytes from an mpi. - * This can be used as a keyid; KEYID is an 2 element array. - * Return the low 4 bytes. - */ -u32 mpi_get_keyid(const MPI a, u32 *keyid) -{ -#if BYTES_PER_MPI_LIMB == 4 - if (keyid) { - keyid[0] = a->nlimbs >= 2 ? a->d[1] : 0; - keyid[1] = a->nlimbs >= 1 ? a->d[0] : 0; - } - return a->nlimbs >= 1 ? a->d[0] : 0; -#elif BYTES_PER_MPI_LIMB == 8 - if (keyid) { - keyid[0] = a->nlimbs ? (u32) (a->d[0] >> 32) : 0; - keyid[1] = a->nlimbs ? (u32) (a->d[0] & 0xffffffff) : 0; - } - return a->nlimbs ? (u32) (a->d[0] & 0xffffffff) : 0; -#else -#error Make this function work with other LIMB sizes -#endif -} - /**************** * Return an allocated buffer with the MPI (msb first). * NBYTES receives the length of this buffer. Caller must free the From f58a08152ce4198a2a1da162b97ecf8264c24866 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:25 +0200 Subject: [PATCH 141/316] lib/digsig: additional sanity checks against badly formated key payload Added sanity checks for possible wrongly formatted key payload data: - minimum key payload size - zero modulus length - corrected upper key payload boundary. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/digsig.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/digsig.c b/lib/digsig.c index fd2402f67f89..5d840ac64fb1 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -105,6 +105,10 @@ static int digsig_verify_rsa(struct key *key, down_read(&key->sem); ukp = key->payload.data; + + if (ukp->datalen < sizeof(*pkh)) + goto err1; + pkh = (struct pubkey_hdr *)ukp->data; if (pkh->version != 1) @@ -117,7 +121,7 @@ static int digsig_verify_rsa(struct key *key, goto err1; datap = pkh->mpi; - endp = datap + ukp->datalen; + endp = ukp->data + ukp->datalen; for (i = 0; i < pkh->nmpi; i++) { unsigned int remaining = endp - datap; @@ -128,7 +132,8 @@ static int digsig_verify_rsa(struct key *key, mblen = mpi_get_nbits(pkey[0]); mlen = (mblen + 7)/8; - err = -ENOMEM; + if (mlen == 0) + goto err; out1 = kzalloc(mlen, GFP_KERNEL); if (!out1) From b35e286a640f31d619a637332972498b51f3fd90 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:26 +0200 Subject: [PATCH 142/316] lib/digsig: pkcs_1_v1_5_decode_emsa cleanup Removed useless 'is_valid' variable in pkcs_1_v1_5_decode_emsa(), which was inhereted from original code. Client now uses return value to check for an error. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/digsig.c | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/lib/digsig.c b/lib/digsig.c index 5d840ac64fb1..b67e82c024b3 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -34,14 +34,9 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, unsigned long msglen, unsigned long modulus_bitlen, unsigned char *out, - unsigned long *outlen, - int *is_valid) + unsigned long *outlen) { unsigned long modulus_len, ps_len, i; - int result; - - /* default to invalid packet */ - *is_valid = 0; modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0); @@ -50,39 +45,30 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, return -EINVAL; /* separate encoded message */ - if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) { - result = -EINVAL; - goto bail; - } + if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) + return -EINVAL; for (i = 2; i < modulus_len - 1; i++) if (msg[i] != 0xFF) break; /* separator check */ - if (msg[i] != 0) { + if (msg[i] != 0) /* There was no octet with hexadecimal value 0x00 to separate ps from m. */ - result = -EINVAL; - goto bail; - } + return -EINVAL; ps_len = i - 2; if (*outlen < (msglen - (2 + ps_len + 1))) { *outlen = msglen - (2 + ps_len + 1); - result = -EOVERFLOW; - goto bail; + return -EOVERFLOW; } *outlen = (msglen - (2 + ps_len + 1)); memcpy(out, &msg[2 + ps_len + 1], *outlen); - /* valid packet */ - *is_valid = 1; - result = 0; -bail: - return result; + return 0; } /* @@ -96,7 +82,7 @@ static int digsig_verify_rsa(struct key *key, unsigned long len; unsigned long mlen, mblen; unsigned nret, l; - int valid, head, i; + int head, i; unsigned char *out1 = NULL, *out2 = NULL; MPI in = NULL, res = NULL, pkey[2]; uint8_t *p, *datap, *endp; @@ -172,10 +158,9 @@ static int digsig_verify_rsa(struct key *key, memset(out1, 0, head); memcpy(out1 + head, p, l); - err = -EINVAL; - pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len, &valid); + err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len); - if (valid && len == hlen) + if (!err && len == hlen) err = memcmp(out2, h, hlen); err: From 4877e056192245b387aae004ab38f7f3899dd57c Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:19 +0200 Subject: [PATCH 143/316] lib/mpi: check for possible zero length Buggy client might pass zero nlimbs which is meaningless. Added check for zero length. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpiutil.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 6bfc41f62b8f..26e4ed31e256 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -58,6 +58,9 @@ mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) { size_t len = nlimbs * sizeof(mpi_limb_t); + if (!len) + return NULL; + return kmalloc(len, GFP_KERNEL); } From a6d68ecc56ed182644c81ef6917ce1f5404db7dd Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:20 +0200 Subject: [PATCH 144/316] lib/mpi: added comment on divide by 0 case Comment explains that existing clients do not call this function with dsize == 0, which means that 1/0 should not happen. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpih-div.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c index 87ede162dfab..cde1aaec18da 100644 --- a/lib/mpi/mpih-div.c +++ b/lib/mpi/mpih-div.c @@ -217,6 +217,10 @@ mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, case 0: /* We are asked to divide by zero, so go ahead and do it! (To make the compiler not remove this statement, return the value.) */ + /* + * existing clients of this function have been modified + * not to call it with dsize == 0, so this should not happen + */ return 1 / dsize; case 1: From 43b2c0aeaab2237996a72f9b9d7952ba82d56913 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:18 +0200 Subject: [PATCH 145/316] lib/mpi: added missing NULL check Added missing NULL check after mpi_alloc_limb_space(). Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/mpi/mpi-div.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c index 8a45717bd001..f68cbbb4d4a4 100644 --- a/lib/mpi/mpi-div.c +++ b/lib/mpi/mpi-div.c @@ -210,6 +210,8 @@ int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den) * numerator would be gradually overwritten by the quotient limbs. */ if (qp == np) { /* Copy NP object to temporary space. */ np = marker[markidx++] = mpi_alloc_limb_space(nsize); + if (!np) + goto nomem; MPN_COPY(np, qp, nsize); } } else /* Put quotient at top of remainder. */ From 86f8bedc9e1a8ddb4f1d9ff1f0c1229cc0797d6d Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:24 +0200 Subject: [PATCH 146/316] lib/digsig: checks for NULL return value mpi_read_from_buffer() return value must not be NULL. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- lib/digsig.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/digsig.c b/lib/digsig.c index b67e82c024b3..286d558033e2 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -109,9 +109,13 @@ static int digsig_verify_rsa(struct key *key, datap = pkh->mpi; endp = ukp->data + ukp->datalen; + err = -ENOMEM; + for (i = 0; i < pkh->nmpi; i++) { unsigned int remaining = endp - datap; pkey[i] = mpi_read_from_buffer(datap, &remaining); + if (!pkey[i]) + goto err; datap += remaining; } @@ -168,8 +172,8 @@ err: mpi_free(res); kfree(out1); kfree(out2); - mpi_free(pkey[0]); - mpi_free(pkey[1]); + while (--i >= 0) + mpi_free(pkey[i]); err1: up_read(&key->sem); From 86698c20f71d488b32c49ed4687fb3cf8a88a5ca Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 31 Jan 2012 19:06:25 -0600 Subject: [PATCH 147/316] drm/radeon/kms: disable output polling when suspended Polling the outputs when the device is suspended can result in erroneous status updates. Disable output polling during suspend to prevent this from happening. Signed-off-by: Seth Forshee Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index cec51a5b69dd..49f7cb7e226b 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -883,6 +883,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + drm_kms_helper_poll_disable(dev); + /* turn off display hw */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); @@ -972,6 +974,8 @@ int radeon_resume_kms(struct drm_device *dev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } + + drm_kms_helper_poll_enable(dev); return 0; } From 1b61925061660009f5b8047f93c5297e04541273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 1 Feb 2012 12:09:55 +0100 Subject: [PATCH 148/316] drm/radeon: Set DESKTOP_HEIGHT register to the framebuffer (not mode) height. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of this register is transferred to the V_COUNTER register at the beginning of vertical blank. V_COUNTER is the reference for VLINE waits and goes from VIEWPORT_Y_START to VIEWPORT_Y_START+VIEWPORT_HEIGHT during scanout, so if VIEWPORT_Y_START is not 0, V_COUNTER actually went backwards at the beginning of vertical blank, and VLINE waits excluding the whole scanout area could never finish (possibly only if VIEWPORT_Y_START is larger than the length of vertical blank in scanlines). Setting DESKTOP_HEIGHT to the framebuffer height should prevent this for any kind of VLINE wait. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=45329 . CC: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 891935271d34..742f17f009a9 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1184,7 +1184,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 1); WREG32(EVERGREEN_DESKTOP_HEIGHT + radeon_crtc->crtc_offset, - crtc->mode.vdisplay); + target_fb->height); x &= ~3; y &= ~1; WREG32(EVERGREEN_VIEWPORT_START + radeon_crtc->crtc_offset, @@ -1353,7 +1353,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 1); WREG32(AVIVO_D1MODE_DESKTOP_HEIGHT + radeon_crtc->crtc_offset, - crtc->mode.vdisplay); + target_fb->height); x &= ~3; y &= ~1; WREG32(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset, From 3f7e363249ad5f4070025f6c09fd264f93f24eab Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 31 Jan 2012 09:55:21 +0100 Subject: [PATCH 149/316] drm/radeon/kms: Fix device tree linkage of DP i2c buses too Properly set the parent device of DP i2c buses before registering them too. Signed-off-by: Jean Delvare Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index e2a393ff0c44..98a8ad680109 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -958,6 +958,7 @@ struct radeon_i2c_chan *radeon_i2c_create_dp(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; i2c->adapter.class = I2C_CLASS_DDC; + i2c->adapter.dev.parent = &dev->pdev->dev; i2c->dev = dev; snprintf(i2c->adapter.name, sizeof(i2c->adapter.name), "Radeon aux bus %s", name); From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 1 Feb 2012 11:10:24 -0800 Subject: [PATCH 150/316] mtd: fix merge conflict resolution breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes merge conflict resolution breakage introduced by merge d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream"). The commit changed 'mtd_can_have_bb()' function and made it always return zero, which is incorrect. Instead, we need it to return whether the underlying flash device can have bad eraseblocks or not. UBI needs this information because it affects how it handles the underlying flash. E.g., if the underlying flash is NOR, it cannot have bad blocks and any write or erase error is fatal, and all we can do is to switch to R/O mode. We do not need to reserve a pool of good eraseblocks for bad eraseblocks handling, and so on. This patch also removes 'mtd_can_have_bb()' invocations from Logfs to ensure correct Logfs behavior. I've tested that with this patch UBI works on top of NOR and NAND flashes emulated by mtdram and nandsim correspondingly. This patch is based on patch from Linus Torvalds. Signed-off-by: Artem Bityutskiy Acked-by: Jörn Engel Acked-by: Prasad Joshi Acked-by: Brian Norris Signed-off-by: Linus Torvalds --- fs/logfs/dev_mtd.c | 6 ------ include/linux/mtd/mtd.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d611e0..9c501449450d 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 221295208fd0..887ebe318c75 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd) static inline int mtd_can_have_bb(const struct mtd_info *mtd) { - return 0; + return !!mtd->block_isbad; } /* Kernel-side ioctl definitions */ From 879a5a001b62a020e074d460b3a7c0fd993f9832 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Tue, 31 Jan 2012 20:02:00 -0800 Subject: [PATCH 151/316] MAINTAINERS: Greg's suse email address is dead My email address has changed, the suse.de one is now dead, so update all of my MAINTAINER entries with the correct one so that patches don't get lost. Also change the status of some of my entries as I'm supposed to be doing this stuff now for real. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- MAINTAINERS | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a1fce9a3ab20..252972b6c4a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -159,7 +159,7 @@ S: Maintained F: drivers/net/ethernet/realtek/r8169.c 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman L: linux-serial@vger.kernel.org W: http://serial.sourceforge.net S: Maintained @@ -1783,9 +1783,9 @@ X: net/wireless/wext* CHAR and MISC DRIVERS M: Arnd Bergmann -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git -S: Maintained +S: Supported F: drivers/char/* F: drivers/misc/* @@ -2320,7 +2320,7 @@ F: lib/lru_cache.c F: Documentation/blockdev/drbd/ DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6.git S: Supported F: Documentation/kobject.txt @@ -6276,15 +6276,15 @@ S: Maintained F: arch/alpha/kernel/srm_env.c STABLE BRANCH -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman L: stable@vger.kernel.org -S: Maintained +S: Supported STAGING SUBSYSTEM -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git L: devel@driverdev.osuosl.org -S: Maintained +S: Supported F: drivers/staging/ STAGING - AGERE HERMES II and II.5 WIRELESS DRIVERS @@ -6669,8 +6669,8 @@ S: Maintained K: ^Subject:.*(?i)trivial TTY LAYER -M: Greg Kroah-Hartman -S: Maintained +M: Greg Kroah-Hartman +S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty-2.6.git F: drivers/tty/ F: drivers/tty/serial/serial_core.c @@ -6958,7 +6958,7 @@ S: Maintained F: drivers/usb/serial/digi_acceleport.c USB SERIAL DRIVER -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman L: linux-usb@vger.kernel.org S: Supported F: Documentation/usb/usb-serial.txt @@ -6973,9 +6973,8 @@ S: Maintained F: drivers/usb/serial/empeg.c USB SERIAL KEYSPAN DRIVER -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman L: linux-usb@vger.kernel.org -W: http://www.kroah.com/linux/ S: Maintained F: drivers/usb/serial/*keyspan* @@ -7003,7 +7002,7 @@ F: Documentation/video4linux/sn9c102.txt F: drivers/media/video/sn9c102/ USB SUBSYSTEM -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman L: linux-usb@vger.kernel.org W: http://www.linux-usb.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb-2.6.git @@ -7090,7 +7089,7 @@ F: fs/hppfs/ USERSPACE I/O (UIO) M: "Hans J. Koch" -M: Greg Kroah-Hartman +M: Greg Kroah-Hartman S: Maintained F: Documentation/DocBook/uio-howto.tmpl F: drivers/uio/ From 71879d3cb3dd8f2dfdefb252775c1b3ea04a3dd4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:38 +0100 Subject: [PATCH 152/316] proc: mem_release() should check mm != NULL mem_release() can hit mm == NULL, add the necessary check. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cde9edf9c4d..c3617ea7830b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -822,8 +822,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; - - mmput(mm); + if (mm) + mmput(mm); return 0; } From 572d34b946bae070debd42db1143034d9687e13f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:54 +0100 Subject: [PATCH 153/316] proc: unify mem_read() and mem_write() No functional changes, cleanup and preparation. mem_read() and mem_write() are very similar. Move this code into the new common helper, mem_rw(), which takes the additional "int write" argument. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 90 ++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 58 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c3617ea7830b..be1909041685 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -718,57 +718,13 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int ret; - char *page; - unsigned long src = *ppos; struct mm_struct *mm = file->private_data; - - if (!mm) - return 0; - - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - - ret = 0; - - while (count > 0) { - int this_len, retval; - - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_remote_vm(mm, src, page, this_len, 0); - if (!retval) { - if (!ret) - ret = -EIO; - break; - } - - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; - - free_page((unsigned long) page); - return ret; -} - -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) -{ - int copied; + unsigned long addr = *ppos; + ssize_t copied; char *page; - unsigned long dst = *ppos; - struct mm_struct *mm = file->private_data; if (!mm) return 0; @@ -779,30 +735,48 @@ static ssize_t mem_write(struct file * file, const char __user *buf, copied = 0; while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_remote_vm(mm, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; free_page((unsigned long) page); return copied; } +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} + loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { From 6d08f2c7139790c268820a2e590795cb8333181a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:15:11 +0100 Subject: [PATCH 154/316] proc: make sure mem_open() doesn't pin the target's memory Once /proc/pid/mem is opened, the memory can't be released until mem_release() even if its owner exits. Change mem_open() to do atomic_inc(mm_count) + mmput(), this only pins mm_struct. Change mem_rw() to do atomic_inc_not_zero(mm_count) before access_remote_vm(), this verifies that this mm is still alive. I am not sure what should mem_rw() return if atomic_inc_not_zero() fails. With this patch it returns zero to match the "mm == NULL" case, may be it should return -EINVAL like it did before e268337d. Perhaps it makes sense to add the additional fatal_signal_pending() check into the main loop, to ensure we do not hold this memory if the target task was oom-killed. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index be1909041685..d9512bd03e6c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -711,6 +711,13 @@ static int mem_open(struct inode* inode, struct file* file) if (IS_ERR(mm)) return PTR_ERR(mm); + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; @@ -734,6 +741,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -761,6 +771,8 @@ static ssize_t mem_rw(struct file *file, char __user *buf, } *ppos = addr; + mmput(mm); +free: free_page((unsigned long) page); return copied; } @@ -797,7 +809,7 @@ static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) - mmput(mm); + mmdrop(mm); return 0; } From c6df4b17c8539f737a6a2d7b797eac41e8e34cdc Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 2 Feb 2012 00:17:54 +0200 Subject: [PATCH 155/316] lib: Fix multiple definitions of clz_tab Both sparc 32-bit's software divide assembler and MPILIB provide clz_tab[] with identical contents. Break it out into a seperate object file and select it when SPARC32 or MPILIB is set. Reported-by: Al Viro Signed-off-by: David S. Miller Signed-off-by: James Morris --- arch/sparc/Kconfig | 1 + arch/sparc/lib/divdi3.S | 16 +--------------- lib/Kconfig | 4 ++++ lib/Makefile | 2 ++ lib/clz_tab.c | 18 ++++++++++++++++++ lib/mpi/mpi-bit.c | 19 ------------------- 6 files changed, 26 insertions(+), 34 deletions(-) create mode 100644 lib/clz_tab.c diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 96657992a72e..ca5580e4d813 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -33,6 +33,7 @@ config SPARC config SPARC32 def_bool !64BIT select GENERIC_ATOMIC64 + select CLZ_TAB config SPARC64 def_bool 64BIT diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index 681b3683da9e..d74bc0925f2d 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -17,23 +17,9 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - .data - .align 8 - .globl __clz_tab -__clz_tab: - .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 - .byte 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 - .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 - .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 - .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - .size __clz_tab,256 - .global .udiv - .text .align 4 + .global .udiv .globl __divdi3 __divdi3: save %sp,-104,%sp diff --git a/lib/Kconfig b/lib/Kconfig index 169eb7c598e5..d69d321a0997 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -279,6 +279,9 @@ config AVERAGE If unsure, say N. +config CLZ_TAB + bool + config CORDIC tristate "CORDIC algorithm" help @@ -287,6 +290,7 @@ config CORDIC config MPILIB tristate + select CLZ_TAB help Multiprecision maths library from GnuPG. It is used to implement RSA digital signature verification, diff --git a/lib/Makefile b/lib/Makefile index d71aae1b01b3..18515f0267c4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -121,6 +121,8 @@ obj-$(CONFIG_DQL) += dynamic_queue_limits.o obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_SIGNATURE) += digsig.o +obj-$(CONFIG_CLZ_TAB) += clz_tab.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/clz_tab.c b/lib/clz_tab.c new file mode 100644 index 000000000000..7287b4a991a7 --- /dev/null +++ b/lib/clz_tab.c @@ -0,0 +1,18 @@ +const unsigned char __clz_tab[] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, +}; diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c index 854c9c6da025..2f526627e4f5 100644 --- a/lib/mpi/mpi-bit.c +++ b/lib/mpi/mpi-bit.c @@ -21,25 +21,6 @@ #include "mpi-internal.h" #include "longlong.h" -const unsigned char __clz_tab[] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, -}; - #define A_LIMB_1 ((mpi_limb_t) 1) /**************** From a99e7e5f36c55e561a64280f5099078c31839076 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 2 Feb 2012 00:17:55 +0200 Subject: [PATCH 156/316] lib: Fix 32-bit sparc udiv_qrnnd() definition in mpilib's longlong.h This copy of longlong.h is extremely dated and results in compile errors on sparc32 when MPILIB is enabled, copy over the more uptodate implementation from arch/sparc/math/sfp-util_32.h Reported-by: Al Viro Signed-off-by: David S. Miller Signed-off-by: James Morris --- lib/mpi/longlong.h | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index b87487b40a8b..29f98624ef93 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -1200,18 +1200,40 @@ do { \ "r" ((USItype)(v)) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ -#endif -#ifndef udiv_qrnnd -#ifndef LONGLONG_STANDALONE +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ #define udiv_qrnnd(q, r, n1, n0, d) \ -do { USItype __r; \ - (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ - (r) = __r; \ -} while (0) - extern USItype __udiv_qrnnd(); -#define UDIV_TIME 140 -#endif /* LONGLONG_STANDALONE */ -#endif /* udiv_qrnnd */ + __asm__ ("! Inlined udiv_qrnnd\n\t" \ + "mov 32,%%g1\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "1: bcs 5f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "addx %1,%1,%1 ! so this can't give carry\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "2: bne 1b\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "bcs 3f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "b 3f\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "4: sub %1,%2,%1\n\t" \ + "5: addxcc %1,%1,%1\n\t" \ + "bcc 2b\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ + "bne 4b\n\t" \ + "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ + "sub %1,%2,%1\n\t" \ + "3: xnor %0,0,%0\n\t" \ + "! End of inline udiv_qrnnd\n" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif #endif /* __sparc__ */ /*************************************** From 13405059e8148259a0d5b66f42e93d641a63e66a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Feb 2012 18:15:49 -0800 Subject: [PATCH 157/316] docbook: fix fatal errors in device-drivers docbook and add DMA Management section Fix 2 fatal errors in the device-drivers docbook. Also add some missing files from drivers/base/; since several of these are DMA-related, add a section for DMA Management. docproc: drivers/base/sys.c: No such file or directory docproc: drivers/tty/serial/8250.c: No such file or directory Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/DocBook/device-drivers.tmpl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 2f7fd4360848..9c27e5125dd2 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -102,9 +102,12 @@ X!Iinclude/linux/kobject.h !Iinclude/linux/device.h Device Drivers Base +!Idrivers/base/init.c !Edrivers/base/driver.c !Edrivers/base/core.c +!Edrivers/base/syscore.c !Edrivers/base/class.c +!Idrivers/base/node.c !Edrivers/base/firmware_class.c !Edrivers/base/transport_class.c -!Edrivers/base/sys.c +!Edrivers/base/dd.c !Iinclude/linux/platform_device.h !Edrivers/base/platform.c !Edrivers/base/bus.c + + Device Drivers DMA Management +!Edrivers/base/dma-buf.c +!Edrivers/base/dma-coherent.c +!Edrivers/base/dma-mapping.c Device Drivers Power Management !Edrivers/base/power/main.c @@ -219,7 +227,7 @@ X!Isound/sound_firmware.c 16x50 UART Driver !Edrivers/tty/serial/serial_core.c -!Edrivers/tty/serial/8250.c +!Edrivers/tty/serial/8250/8250.c From da46d7dd530c5af7cff049145f0c088f5e75bdc6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Feb 2012 19:04:02 -0800 Subject: [PATCH 158/316] staging: fix go7007-usb license Add MODULE_LICENSE() as per the license in the comment at the top of the file for this source module to fix build warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/staging/media/go7007/go7007-usb.o see include/linux/module.h for more information Signed-off-by: Randy Dunlap Cc: Ross Cohen Signed-off-by: Linus Torvalds --- drivers/staging/media/go7007/go7007-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/go7007/go7007-usb.c b/drivers/staging/media/go7007/go7007-usb.c index 70e006b50f29..5443e25086e9 100644 --- a/drivers/staging/media/go7007/go7007-usb.c +++ b/drivers/staging/media/go7007/go7007-usb.c @@ -1279,3 +1279,4 @@ static struct usb_driver go7007_usb_driver = { }; module_usb_driver(go7007_usb_driver); +MODULE_LICENSE("GPL v2"); From 054d867e032daf55c3343fc6d36c5c5f1e7954db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Jan 2012 12:25:50 +0100 Subject: [PATCH 159/316] ALSA: hda - Check power-state before changing in patch_via.c Instead of always writing AC_VERB_SET_POWER_STATE, check the current power-state and don't write again if the value is already set. This may reduce the click noise upon the dynamic power-state change (e.g. in analog-input mixer). Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 256 ++++++++++++++++---------------------- 1 file changed, 107 insertions(+), 149 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 03e63fed9caf..fb1f0ffc556b 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -687,6 +687,15 @@ static void via_auto_init_analog_input(struct hda_codec *codec) } } +static void update_power_state(struct hda_codec *codec, hda_nid_t nid, + unsigned int parm) +{ + if (snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_POWER_STATE, 0) == parm) + return; + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, parm); +} + static void set_pin_power_state(struct hda_codec *codec, hda_nid_t nid, unsigned int *affected_parm) { @@ -709,7 +718,7 @@ static void set_pin_power_state(struct hda_codec *codec, hda_nid_t nid, } else parm = AC_PWRST_D3; - snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, nid, parm); } static int via_pin_power_ctl_info(struct snd_kcontrol *kcontrol, @@ -2295,10 +2304,7 @@ static int via_mux_enum_put(struct snd_kcontrol *kcontrol, if (mux) { /* switch to D0 beofre change index */ - if (snd_hda_codec_read(codec, mux, 0, - AC_VERB_GET_POWER_STATE, 0x00) != AC_PWRST_D0) - snd_hda_codec_write(codec, mux, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, mux, AC_PWRST_D0); snd_hda_codec_write(codec, mux, 0, AC_VERB_SET_CONNECT_SEL, spec->inputs[cur].mux_idx); @@ -2922,9 +2928,9 @@ static void set_widgets_power_state_vt1708B(struct hda_codec *codec) if (imux_is_smixer) parm = AC_PWRST_D0; /* SW0 (17h), AIW 0/1 (13h/14h) */ - snd_hda_codec_write(codec, 0x17, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x13, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x17, parm); + update_power_state(codec, 0x13, parm); + update_power_state(codec, 0x14, parm); /* outputs */ /* PW0 (19h), SW1 (18h), AOW1 (11h) */ @@ -2932,8 +2938,8 @@ static void set_widgets_power_state_vt1708B(struct hda_codec *codec) set_pin_power_state(codec, 0x19, &parm); if (spec->smart51_enabled) set_pin_power_state(codec, 0x1b, &parm); - snd_hda_codec_write(codec, 0x18, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x11, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x18, parm); + update_power_state(codec, 0x11, parm); /* PW6 (22h), SW2 (26h), AOW2 (24h) */ if (is_8ch) { @@ -2941,20 +2947,16 @@ static void set_widgets_power_state_vt1708B(struct hda_codec *codec) set_pin_power_state(codec, 0x22, &parm); if (spec->smart51_enabled) set_pin_power_state(codec, 0x1a, &parm); - snd_hda_codec_write(codec, 0x26, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x24, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x26, parm); + update_power_state(codec, 0x24, parm); } else if (codec->vendor_id == 0x11064397) { /* PW7(23h), SW2(27h), AOW2(25h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x23, &parm); if (spec->smart51_enabled) set_pin_power_state(codec, 0x1a, &parm); - snd_hda_codec_write(codec, 0x27, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x25, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x27, parm); + update_power_state(codec, 0x25, parm); } /* PW 3/4/7 (1ch/1dh/23h) */ @@ -2966,17 +2968,13 @@ static void set_widgets_power_state_vt1708B(struct hda_codec *codec) set_pin_power_state(codec, 0x23, &parm); /* MW0 (16h), Sw3 (27h), AOW 0/3 (10h/25h) */ - snd_hda_codec_write(codec, 0x16, 0, AC_VERB_SET_POWER_STATE, - imux_is_smixer ? AC_PWRST_D0 : parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x16, imux_is_smixer ? AC_PWRST_D0 : parm); + update_power_state(codec, 0x10, parm); if (is_8ch) { - snd_hda_codec_write(codec, 0x25, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x27, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x25, parm); + update_power_state(codec, 0x27, parm); } else if (codec->vendor_id == 0x11064397 && spec->hp_independent_mode) - snd_hda_codec_write(codec, 0x25, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x25, parm); } static int patch_vt1708S(struct hda_codec *codec); @@ -3149,10 +3147,10 @@ static void set_widgets_power_state_vt1702(struct hda_codec *codec) if (imux_is_smixer) parm = AC_PWRST_D0; /* SW0 (13h) = stereo mixer (idx 3) */ /* SW0 (13h), AIW 0/1/2 (12h/1fh/20h) */ - snd_hda_codec_write(codec, 0x13, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x12, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1f, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x20, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x13, parm); + update_power_state(codec, 0x12, parm); + update_power_state(codec, 0x1f, parm); + update_power_state(codec, 0x20, parm); /* outputs */ /* PW 3/4 (16h/17h) */ @@ -3160,10 +3158,9 @@ static void set_widgets_power_state_vt1702(struct hda_codec *codec) set_pin_power_state(codec, 0x17, &parm); set_pin_power_state(codec, 0x16, &parm); /* MW0 (1ah), AOW 0/1 (10h/1dh) */ - snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_POWER_STATE, - imux_is_smixer ? AC_PWRST_D0 : parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1d, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1a, imux_is_smixer ? AC_PWRST_D0 : parm); + update_power_state(codec, 0x10, parm); + update_power_state(codec, 0x1d, parm); } static int patch_vt1702(struct hda_codec *codec) @@ -3228,52 +3225,48 @@ static void set_widgets_power_state_vt1718S(struct hda_codec *codec) if (imux_is_smixer) parm = AC_PWRST_D0; /* MUX6/7 (1eh/1fh), AIW 0/1 (10h/11h) */ - snd_hda_codec_write(codec, 0x1e, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1f, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x11, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1e, parm); + update_power_state(codec, 0x1f, parm); + update_power_state(codec, 0x10, parm); + update_power_state(codec, 0x11, parm); /* outputs */ /* PW3 (27h), MW2 (1ah), AOW3 (bh) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x27, &parm); - snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0xb, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1a, parm); + update_power_state(codec, 0xb, parm); /* PW2 (26h), AOW2 (ah) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x26, &parm); if (spec->smart51_enabled) set_pin_power_state(codec, 0x2b, &parm); - snd_hda_codec_write(codec, 0xa, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0xa, parm); /* PW0 (24h), AOW0 (8h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x24, &parm); if (!spec->hp_independent_mode) /* check for redirected HP */ set_pin_power_state(codec, 0x28, &parm); - snd_hda_codec_write(codec, 0x8, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x8, parm); /* MW9 (21h), Mw2 (1ah), AOW0 (8h) */ - snd_hda_codec_write(codec, 0x21, 0, AC_VERB_SET_POWER_STATE, - imux_is_smixer ? AC_PWRST_D0 : parm); + update_power_state(codec, 0x21, imux_is_smixer ? AC_PWRST_D0 : parm); /* PW1 (25h), AOW1 (9h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x25, &parm); if (spec->smart51_enabled) set_pin_power_state(codec, 0x2a, &parm); - snd_hda_codec_write(codec, 0x9, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x9, parm); if (spec->hp_independent_mode) { /* PW4 (28h), MW3 (1bh), MUX1(34h), AOW4 (ch) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x28, &parm); - snd_hda_codec_write(codec, 0x1b, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x34, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0xc, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1b, parm); + update_power_state(codec, 0x34, parm); + update_power_state(codec, 0xc, parm); } } @@ -3433,8 +3426,8 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) if (imux_is_smixer) parm = AC_PWRST_D0; /* SW0 (17h), AIW0(13h) */ - snd_hda_codec_write(codec, 0x17, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x13, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x17, parm); + update_power_state(codec, 0x13, parm); parm = AC_PWRST_D3; set_pin_power_state(codec, 0x1e, &parm); @@ -3442,12 +3435,11 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) if (spec->dmic_enabled) set_pin_power_state(codec, 0x22, &parm); else - snd_hda_codec_write(codec, 0x22, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + update_power_state(codec, 0x22, AC_PWRST_D3); /* SW2(26h), AIW1(14h) */ - snd_hda_codec_write(codec, 0x26, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x26, parm); + update_power_state(codec, 0x14, parm); /* outputs */ /* PW0 (19h), SW1 (18h), AOW1 (11h) */ @@ -3456,8 +3448,8 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) /* Smart 5.1 PW2(1bh) */ if (spec->smart51_enabled) set_pin_power_state(codec, 0x1b, &parm); - snd_hda_codec_write(codec, 0x18, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x11, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x18, parm); + update_power_state(codec, 0x11, parm); /* PW7 (23h), SW3 (27h), AOW3 (25h) */ parm = AC_PWRST_D3; @@ -3465,12 +3457,12 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) /* Smart 5.1 PW1(1ah) */ if (spec->smart51_enabled) set_pin_power_state(codec, 0x1a, &parm); - snd_hda_codec_write(codec, 0x27, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x27, parm); /* Smart 5.1 PW5(1eh) */ if (spec->smart51_enabled) set_pin_power_state(codec, 0x1e, &parm); - snd_hda_codec_write(codec, 0x25, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x25, parm); /* Mono out */ /* SW4(28h)->MW1(29h)-> PW12 (2ah)*/ @@ -3486,9 +3478,9 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) mono_out = 1; } parm = mono_out ? AC_PWRST_D0 : AC_PWRST_D3; - snd_hda_codec_write(codec, 0x28, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x29, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x2a, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x28, parm); + update_power_state(codec, 0x29, parm); + update_power_state(codec, 0x2a, parm); /* PW 3/4 (1ch/1dh) */ parm = AC_PWRST_D3; @@ -3496,15 +3488,12 @@ static void set_widgets_power_state_vt1716S(struct hda_codec *codec) set_pin_power_state(codec, 0x1d, &parm); /* HP Independent Mode, power on AOW3 */ if (spec->hp_independent_mode) - snd_hda_codec_write(codec, 0x25, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x25, parm); /* force to D0 for internal Speaker */ /* MW0 (16h), AOW0 (10h) */ - snd_hda_codec_write(codec, 0x16, 0, AC_VERB_SET_POWER_STATE, - imux_is_smixer ? AC_PWRST_D0 : parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, - mono_out ? AC_PWRST_D0 : parm); + update_power_state(codec, 0x16, imux_is_smixer ? AC_PWRST_D0 : parm); + update_power_state(codec, 0x10, mono_out ? AC_PWRST_D0 : parm); } static int patch_vt1716S(struct hda_codec *codec) @@ -3580,54 +3569,45 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec) set_pin_power_state(codec, 0x2b, &parm); parm = AC_PWRST_D0; /* MUX9/10 (1eh/1fh), AIW 0/1 (10h/11h) */ - snd_hda_codec_write(codec, 0x1e, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1f, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x11, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1e, parm); + update_power_state(codec, 0x1f, parm); + update_power_state(codec, 0x10, parm); + update_power_state(codec, 0x11, parm); /* outputs */ /* AOW0 (8h)*/ - snd_hda_codec_write(codec, 0x8, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x8, parm); if (spec->codec_type == VT1802) { /* PW4 (28h), MW4 (18h), MUX4(38h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x28, &parm); - snd_hda_codec_write(codec, 0x18, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x38, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x18, parm); + update_power_state(codec, 0x38, parm); } else { /* PW4 (26h), MW4 (1ch), MUX4(37h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x26, &parm); - snd_hda_codec_write(codec, 0x1c, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x37, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1c, parm); + update_power_state(codec, 0x37, parm); } if (spec->codec_type == VT1802) { /* PW1 (25h), MW1 (15h), MUX1(35h), AOW1 (9h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x25, &parm); - snd_hda_codec_write(codec, 0x15, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x35, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x15, parm); + update_power_state(codec, 0x35, parm); } else { /* PW1 (25h), MW1 (19h), MUX1(35h), AOW1 (9h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x25, &parm); - snd_hda_codec_write(codec, 0x19, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x35, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x19, parm); + update_power_state(codec, 0x35, parm); } if (spec->hp_independent_mode) - snd_hda_codec_write(codec, 0x9, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x9, AC_PWRST_D0); /* Class-D */ /* PW0 (24h), MW0(18h/14h), MUX0(34h) */ @@ -3637,12 +3617,10 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec) set_pin_power_state(codec, 0x24, &parm); parm = present ? AC_PWRST_D3 : AC_PWRST_D0; if (spec->codec_type == VT1802) - snd_hda_codec_write(codec, 0x14, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x14, parm); else - snd_hda_codec_write(codec, 0x18, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x34, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x18, parm); + update_power_state(codec, 0x34, parm); /* Mono Out */ present = snd_hda_jack_detect(codec, 0x26); @@ -3650,28 +3628,20 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec) parm = present ? AC_PWRST_D3 : AC_PWRST_D0; if (spec->codec_type == VT1802) { /* PW15 (33h), MW8(1ch), MUX8(3ch) */ - snd_hda_codec_write(codec, 0x33, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1c, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x3c, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x33, parm); + update_power_state(codec, 0x1c, parm); + update_power_state(codec, 0x3c, parm); } else { /* PW15 (31h), MW8(17h), MUX8(3bh) */ - snd_hda_codec_write(codec, 0x31, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x17, 0, - AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x3b, 0, - AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x31, parm); + update_power_state(codec, 0x17, parm); + update_power_state(codec, 0x3b, parm); } /* MW9 (21h) */ if (imux_is_smixer || !is_aa_path_mute(codec)) - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x21, AC_PWRST_D0); else - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + update_power_state(codec, 0x21, AC_PWRST_D3); } /* patch for vt2002P */ @@ -3731,30 +3701,28 @@ static void set_widgets_power_state_vt1812(struct hda_codec *codec) set_pin_power_state(codec, 0x2b, &parm); parm = AC_PWRST_D0; /* MUX10/11 (1eh/1fh), AIW 0/1 (10h/11h) */ - snd_hda_codec_write(codec, 0x1e, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x1f, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x10, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x11, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1e, parm); + update_power_state(codec, 0x1f, parm); + update_power_state(codec, 0x10, parm); + update_power_state(codec, 0x11, parm); /* outputs */ /* AOW0 (8h)*/ - snd_hda_codec_write(codec, 0x8, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x8, AC_PWRST_D0); /* PW4 (28h), MW4 (18h), MUX4(38h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x28, &parm); - snd_hda_codec_write(codec, 0x18, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x38, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x18, parm); + update_power_state(codec, 0x38, parm); /* PW1 (25h), MW1 (15h), MUX1(35h), AOW1 (9h) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x25, &parm); - snd_hda_codec_write(codec, 0x15, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x35, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x15, parm); + update_power_state(codec, 0x35, parm); if (spec->hp_independent_mode) - snd_hda_codec_write(codec, 0x9, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x9, AC_PWRST_D0); /* Internal Speaker */ /* PW0 (24h), MW0(14h), MUX0(34h) */ @@ -3763,15 +3731,11 @@ static void set_widgets_power_state_vt1812(struct hda_codec *codec) parm = AC_PWRST_D3; set_pin_power_state(codec, 0x24, &parm); if (present) { - snd_hda_codec_write(codec, 0x14, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - snd_hda_codec_write(codec, 0x34, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + update_power_state(codec, 0x14, AC_PWRST_D3); + update_power_state(codec, 0x34, AC_PWRST_D3); } else { - snd_hda_codec_write(codec, 0x14, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); - snd_hda_codec_write(codec, 0x34, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x14, AC_PWRST_D0); + update_power_state(codec, 0x34, AC_PWRST_D0); } @@ -3782,26 +3746,20 @@ static void set_widgets_power_state_vt1812(struct hda_codec *codec) parm = AC_PWRST_D3; set_pin_power_state(codec, 0x31, &parm); if (present) { - snd_hda_codec_write(codec, 0x1c, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - snd_hda_codec_write(codec, 0x3c, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - snd_hda_codec_write(codec, 0x3e, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + update_power_state(codec, 0x1c, AC_PWRST_D3); + update_power_state(codec, 0x3c, AC_PWRST_D3); + update_power_state(codec, 0x3e, AC_PWRST_D3); } else { - snd_hda_codec_write(codec, 0x1c, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); - snd_hda_codec_write(codec, 0x3c, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); - snd_hda_codec_write(codec, 0x3e, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D0); + update_power_state(codec, 0x1c, AC_PWRST_D0); + update_power_state(codec, 0x3c, AC_PWRST_D0); + update_power_state(codec, 0x3e, AC_PWRST_D0); } /* PW15 (33h), MW15 (1dh), MUX15(3dh) */ parm = AC_PWRST_D3; set_pin_power_state(codec, 0x33, &parm); - snd_hda_codec_write(codec, 0x1d, 0, AC_VERB_SET_POWER_STATE, parm); - snd_hda_codec_write(codec, 0x3d, 0, AC_VERB_SET_POWER_STATE, parm); + update_power_state(codec, 0x1d, parm); + update_power_state(codec, 0x3d, parm); } From 924339239fd5ba3e505f9420d41f0939196f3530 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Jan 2012 13:58:36 +0100 Subject: [PATCH 160/316] ALSA: hda - Fix the logic to detect VIA analog low-current mode The analog low-current mode must be enabled when the no stream is running but the current detection checks it in a wrong way. Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=741128 Cc: [v3.1+] Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index fb1f0ffc556b..de43cd92b0a5 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1051,7 +1051,7 @@ static void analog_low_current_mode(struct hda_codec *codec) bool enable; unsigned int verb, parm; - enable = is_aa_path_mute(codec) && (spec->opened_streams != 0); + enable = is_aa_path_mute(codec) && !spec->opened_streams; /* decide low current mode's verb & parameter */ switch (spec->codec_type) { From e9d010c2e8f03952e67a6fd8aed0f0dc92084ccc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Feb 2012 10:33:23 +0100 Subject: [PATCH 161/316] ALSA: hda - Allow analog low-current mode when dynamic power-control is on VIA codecs have several different power-saving features, and one of them is the analog low-current mode. But it turned out that the ALC mode causes pop-noises at each on/off time on some machines. As a quick workaround, disable the ALC when another power-saving feature, the dynamic pin power-control, is turned off, too, since the dynamic power-control is already exposed as a mixer enum element so that user can turn it on/off freely. Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=741128 Cc: [v3.1+] Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index de43cd92b0a5..79166fb8b074 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -199,6 +199,9 @@ struct via_spec { unsigned int no_pin_power_ctl; enum VIA_HDA_CODEC codec_type; + /* analog low-power control */ + bool alc_mode; + /* smart51 setup */ unsigned int smart51_nums; hda_nid_t smart51_pins[2]; @@ -758,6 +761,7 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol, return 0; spec->no_pin_power_ctl = val; set_widgets_power_state(codec); + analog_low_current_mode(codec); return 1; } @@ -1045,13 +1049,19 @@ static bool is_aa_path_mute(struct hda_codec *codec) } /* enter/exit analog low-current mode */ -static void analog_low_current_mode(struct hda_codec *codec) +static void __analog_low_current_mode(struct hda_codec *codec, bool force) { struct via_spec *spec = codec->spec; bool enable; unsigned int verb, parm; - enable = is_aa_path_mute(codec) && !spec->opened_streams; + if (spec->no_pin_power_ctl) + enable = false; + else + enable = is_aa_path_mute(codec) && !spec->opened_streams; + if (enable == spec->alc_mode && !force) + return; + spec->alc_mode = enable; /* decide low current mode's verb & parameter */ switch (spec->codec_type) { @@ -1083,6 +1093,11 @@ static void analog_low_current_mode(struct hda_codec *codec) snd_hda_codec_write(codec, codec->afg, 0, verb, parm); } +static void analog_low_current_mode(struct hda_codec *codec) +{ + return __analog_low_current_mode(codec, false); +} + /* * generic initialization of ADC, input mixers and output mixers */ @@ -1508,10 +1523,6 @@ static int via_build_controls(struct hda_codec *codec) return err; } - /* init power states */ - set_widgets_power_state(codec); - analog_low_current_mode(codec); - via_free_kctls(codec); /* no longer needed */ err = snd_hda_jack_add_kctls(codec, &spec->autocfg); @@ -2782,6 +2793,10 @@ static int via_init(struct hda_codec *codec) for (i = 0; i < spec->num_iverbs; i++) snd_hda_sequence_write(codec, spec->init_verbs[i]); + /* init power states */ + set_widgets_power_state(codec); + __analog_low_current_mode(codec, true); + via_auto_init_multi_out(codec); via_auto_init_hp_out(codec); via_auto_init_speaker_out(codec); From b5bcc189401c815988b7dd37611fc56f40c9139d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 2 Feb 2012 10:30:17 +0100 Subject: [PATCH 162/316] ALSA: hda - Disable dynamic-power control for VIA as default Since the dynamic pin power-control and the analog low-current mode may lead to pop-noise, it's safer to set it off as default. Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=741128 Cc: [v3.1+] Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 79166fb8b074..284e311040fe 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1470,6 +1470,7 @@ static int via_build_controls(struct hda_codec *codec) struct snd_kcontrol *kctl; int err, i; + spec->no_pin_power_ctl = 1; if (spec->set_widgets_power_state) if (!via_clone_control(spec, &via_pin_power_ctl_enum)) return -ENOMEM; From de47a9cd62771e3e78954d855d2304fbad4c5a44 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 2 Feb 2012 15:25:16 +0000 Subject: [PATCH 163/316] drm/radeon: fix use after free in ATRM bios reading code. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=45503 Reported-and-Debugged-by: mlambda@gmail.com Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 13ac63ba6075..98724fcb0088 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -59,8 +59,9 @@ static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios, obj = (union acpi_object *)buffer.pointer; memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length); + len = obj->buffer.length; kfree(buffer.pointer); - return obj->buffer.length; + return len; } bool radeon_atrm_supported(struct pci_dev *pdev) From 304a48400d9718f74ec35ae46f30868a5f4c4516 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 2 Feb 2012 10:18:00 -0500 Subject: [PATCH 164/316] drm/radeon/kms: fix TRAVIS panel setup Different versions of the DP to LVDS bridge chip need different panel mode settings depending on the chip version used. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=41569 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_dp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index a71557ce01dc..552b436451fd 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -564,9 +564,21 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder, ENCODER_OBJECT_ID_NUTMEG) panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE; else if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) == - ENCODER_OBJECT_ID_TRAVIS) - panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE; - else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + ENCODER_OBJECT_ID_TRAVIS) { + u8 id[6]; + int i; + for (i = 0; i < 6; i++) + id[i] = radeon_read_dpcd_reg(radeon_connector, 0x503 + i); + if (id[0] == 0x73 && + id[1] == 0x69 && + id[2] == 0x76 && + id[3] == 0x61 && + id[4] == 0x72 && + id[5] == 0x54) + panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE; + else + panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE; + } else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { u8 tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP); if (tmp & 1) panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE; From 52b53a0bf8026a322cfa6cfec6a10dd31fef8752 Mon Sep 17 00:00:00 2001 From: Ilija Hadzic Date: Thu, 2 Feb 2012 10:26:24 -0500 Subject: [PATCH 165/316] drm/radeon/kms/blit: fix blit copy for very large buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evergreen and NI blit copy was broken if the buffer maps to a rectangle whose one dimension is 16384 (max dimension allowed by these chips). In the mainline kernel, the problem is exposed only when buffers are very large (1G), but it's still a problem. The problem could be exposed for smaller buffers if anyone modifies the algorithm for rectangle construction in r600_blit_create_rect() (the reason why someone would modify that algorithm is to tune the performance of buffer moves). The root cause was in i2f() function which only operated on range between 0 and 16383. Fix this by extending the range of i2f() function to 0 to 32767. While at it improve the function so that the range can be easily extended in the future (if it becomes necessary), cleanup lines over 80 characters, and replace in-line comments with one strategic comment that explains the crux of the function. Credits to michel@daenzer.net for pointing out the root cause of the bug. v2: Fix I2F_MAX_INPUT constant definition goof and warn only once if input argument is out of range. Edit the comment a little bit to avoid some linguistic confusion and make it look better in general. Signed-off-by: Ilija Hadzic Reviewed-by: Alex Deucher Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_blit_kms.c | 35 ++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index d996f4381130..accc032c103f 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -468,27 +468,42 @@ set_default_state(struct radeon_device *rdev) radeon_ring_write(ring, sq_stack_resource_mgmt_2); } +#define I2F_MAX_BITS 15 +#define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1) +#define I2F_SHIFT (24 - I2F_MAX_BITS) + +/* + * Converts unsigned integer into 32-bit IEEE floating point representation. + * Conversion is not universal and only works for the range from 0 + * to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between + * 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary, + * I2F_MAX_BITS can be increased, but that will add to the loop iterations + * and slow us down. Conversion is done by shifting the input and counting + * down until the first 1 reaches bit position 23. The resulting counter + * and the shifted input are, respectively, the exponent and the fraction. + * The sign is always zero. + */ static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction; - if ((input & 0x3fff) == 0) - result = 0; /* 0 is a special case */ + WARN_ON_ONCE(input > I2F_MAX_INPUT); + + if ((input & I2F_MAX_INPUT) == 0) + result = 0; else { - exponent = 140; /* exponent biased by 127; */ - fraction = (input & 0x3fff) << 10; /* cheat and only - handle numbers below 2^^15 */ - for (i = 0; i < 14; i++) { + exponent = 126 + I2F_MAX_BITS; + fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT; + + for (i = 0; i < I2F_MAX_BITS; i++) { if (fraction & 0x800000) break; else { - fraction = fraction << 1; /* keep - shifting left until top bit = 1 */ + fraction = fraction << 1; exponent = exponent - 1; } } - result = exponent << 23 | (fraction & 0x7fffff); /* mask - off top bit; assumed 1 */ + result = exponent << 23 | (fraction & 0x7fffff); } return result; } From 114fc47492e23d93653e4a16664833e98d62a563 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 11 Jan 2012 17:41:01 -0800 Subject: [PATCH 166/316] ceph: change "ceph.layout" xattr to be "ceph.file.layout" The virtual extended attribute named "ceph.layout" is meaningful only for regular files. Change its name to be "ceph.file.layout" to more directly reflect that in the ceph xattr namespace. Preserve the old "ceph.layout" name for the time being (until we decide it's safe to get rid of it entirely). Add a missing initializer for "readonly" in the terminating entry. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a5e36e4488a7..9e6734e38c12 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_file_vxattrs[] = { + { true, "ceph.file.layout", ceph_vxattrcb_layout}, + /* The following extended attribute name is deprecated */ { true, "ceph.layout", ceph_vxattrcb_layout}, - { NULL, NULL } + { true, NULL, NULL } }; static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) From ab434b60ab07f8c44246b6fb0cddee436687a09a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 13 Jan 2012 22:22:03 -0800 Subject: [PATCH 167/316] ceph: initialize client debugfs outside of monc->mutex Initializing debufs under monc->mutex introduces a lock dependency for sb->s_type->i_mutex_key, which (combined with several other dependencies) leads to an annoying lockdep warning. There's no particular reason to do the debugfs setup under this lock, so move it out. It used to be the case that our first monmap could come from the OSD; that is no longer the case with recent servers, so we will reliably set up the client entry during the initial authentication. We don't have to worry about racing with debugfs teardown by ceph_debugfs_client_cleanup() because ceph_destroy_client() calls ceph_msgr_flush() first, which will wait for the message dispatch work to complete (and the debugfs init to complete). Fixes: #1940 Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 2 -- net/ceph/mon_client.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 97f70e50ad3b..761ad9d6cc3b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -85,8 +85,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) } else { pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); - ceph_debugfs_client_init(client); - client->have_fsid = true; } return 0; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 0b62deae42bd..1845cde26227 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -8,8 +8,8 @@ #include #include +#include #include - #include /* @@ -340,8 +340,19 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, client->monc.monmap = monmap; kfree(old); + if (!client->have_fsid) { + client->have_fsid = true; + mutex_unlock(&monc->mutex); + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(client); + goto out_unlocked; + } out: mutex_unlock(&monc->mutex); +out_unlocked: wake_up_all(&client->auth_wq); } From 32852a81bccd9e3d1953b894966393d1b546576d Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 14 Jan 2012 22:20:59 -0500 Subject: [PATCH 168/316] ceph: fix length validation in parse_reply_info() "len" is read from network and thus needs validation. Otherwise, given a bogus "len" value, p+len could be an out-of-bounds pointer, which is used in further parsing. Signed-off-by: Xi Wang Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6203d805eb45..be1415fcaac8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* trace */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_trace(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_extra(&p, p+len, info, features); if (err < 0) goto out_bad; From d8fb02abdc39f92a1066313e2b17047876afa8f9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 Jan 2012 17:48:10 -0800 Subject: [PATCH 169/316] ceph: create a new session lock to avoid lock inversion Lockdep was reporting a possible circular lock dependency in dentry_lease_is_valid(). That function needs to sample the session's s_cap_gen and and s_cap_ttl fields coherently, but needs to do so while holding a dentry lock. The s_cap_lock field was being used to protect the two fields, but that can't be taken while holding a lock on a dentry within the session. In most cases, the s_cap_gen and s_cap_ttl fields only get operated on separately. But in three cases they need to be updated together. Implement a new lock to protect the spots updating both fields atomically is required. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/caps.c | 4 ++-- fs/ceph/dir.c | 4 ++-- fs/ceph/mds_client.c | 8 +++++--- fs/ceph/mds_client.h | 7 +++++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8b53193e4f7c..90d789df9ce0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap) unsigned long ttl; u32 gen; - spin_lock(&cap->session->s_cap_lock); + spin_lock(&cap->session->s_gen_ttl_lock); gen = cap->session->s_cap_gen; ttl = cap->session->s_cap_ttl; - spin_unlock(&cap->session->s_cap_lock); + spin_unlock(&cap->session->s_gen_ttl_lock); if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { dout("__cap_is_valid %p cap %p issued %s " diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 98954003a8d3..63c52f33361b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry) di = ceph_dentry(dentry); if (di && di->lease_session) { s = di->lease_session; - spin_lock(&s->s_cap_lock); + spin_lock(&s->s_gen_ttl_lock); gen = s->s_cap_gen; ttl = s->s_cap_ttl; - spin_unlock(&s->s_cap_lock); + spin_unlock(&s->s_gen_ttl_lock); if (di->lease_gen == gen && time_before(jiffies, dentry->d_time) && diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index be1415fcaac8..a4fdf9397a90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -400,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; s->s_con.peer_name.num = cpu_to_le64(mds); - spin_lock_init(&s->s_cap_lock); + spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; s->s_cap_ttl = 0; + + spin_lock_init(&s->s_cap_lock); s->s_renew_requested = 0; s->s_renew_seq = 0; INIT_LIST_HEAD(&s->s_caps); @@ -2328,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_STALE: pr_info("mds%d caps went stale, renewing\n", session->s_mds); - spin_lock(&session->s_cap_lock); + spin_lock(&session->s_gen_ttl_lock); session->s_cap_gen++; session->s_cap_ttl = 0; - spin_unlock(&session->s_cap_lock); + spin_unlock(&session->s_gen_ttl_lock); send_renew_caps(mdsc, session); break; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a50ca0e39475..8c7c04ebb595 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -117,10 +117,13 @@ struct ceph_mds_session { void *s_authorizer_buf, *s_authorizer_reply_buf; size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; - /* protected by s_cap_lock */ - spinlock_t s_cap_lock; + /* protected by s_gen_ttl_lock */ + spinlock_t s_gen_ttl_lock; u32 s_cap_gen; /* inc each time we get mds stale msg */ unsigned long s_cap_ttl; /* when session caps expire */ + + /* protected by s_cap_lock */ + spinlock_t s_cap_lock; struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; From 97bb59a03dd6767fcc00be09b0c6d9e5294eeea6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 24 Jan 2012 10:08:36 -0600 Subject: [PATCH 170/316] rbd: fix a memory leak in rbd_get_client() If an existing rbd client is found to be suitable for use in rbd_get_client(), the rbd_options structure is not being freed as it should. Fix that. Signed-off-by: Alex Elder Signed-off-by: Sage Weil --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 148ab944378d..7d8f8ddb3359 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -380,6 +380,7 @@ static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, rbdc = __rbd_client_find(opt); if (rbdc) { ceph_destroy_options(opt); + kfree(rbd_opts); /* using an existing client */ kref_get(&rbdc->kref); From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 2 Feb 2012 11:34:09 +1030 Subject: [PATCH 171/316] Fix race in process_vm_rw_core This fixes the race in process_vm_core found by Oleg (see http://article.gmane.org/gmane.linux.kernel/1235667/ for details). This has been updated since I last sent it as the creation of the new mm_access() function did almost exactly the same thing as parts of the previous version of this patch did. In order to use mm_access() even when /proc isn't enabled, we move it to kernel/fork.c where other related process mm access functions already are. Signed-off-by: Chris Yeoh Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 -------------------- include/linux/sched.h | 6 ++++++ kernel/fork.c | 20 ++++++++++++++++++++ mm/process_vm_access.c | 23 +++++++++-------------- 4 files changed, 35 insertions(+), 34 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index d9512bd03e6c..d4548dd49b02 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2234985a5e65..7d379a6bfd88 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm) extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ diff --git a/kernel/fork.c b/kernel/fork.c index 051f090d40c1..1b2ef3c23ae4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task) } EXPORT_SYMBOL_GPL(get_task_mm); +struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) +{ + struct mm_struct *mm; + int err; + + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, mode)) { + mmput(mm); + mm = ERR_PTR(-EACCES); + } + mutex_unlock(&task->signal->cred_guard_mutex); + + return mm; +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index e920aa3ce104..c20ff48994c2 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -298,22 +298,17 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, goto free_proc_pages; } - task_lock(task); - if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) { - task_unlock(task); - rc = -EPERM; + mm = mm_access(task, PTRACE_MODE_ATTACH); + if (!mm || IS_ERR(mm)) { + rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + /* + * Explicitly map EACCES to EPERM as EPERM is a more a + * appropriate error code for process_vw_readv/writev + */ + if (rc == -EACCES) + rc = -EPERM; goto put_task_struct; } - mm = task->mm; - - if (!mm || (task->flags & PF_KTHREAD)) { - task_unlock(task); - rc = -EINVAL; - goto put_task_struct; - } - - atomic_inc(&mm->mm_users); - task_unlock(task); for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { rc = process_vm_rw_single_vec( From d23a4b3fd6ef70b80411b39b8c8bc548a219ce70 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sun, 29 Jan 2012 13:57:43 -0600 Subject: [PATCH 172/316] rbd: fix safety of rbd_put_client() The rbd_client structure uses a kref to arrange for cleaning up and freeing an instance when its last reference is dropped. The cleanup routine is rbd_client_release(), and one of the things it does is delete the rbd_client from rbd_client_list. It acquires node_lock to do so, but the way it is done is still not safe. The problem is that when attempting to reuse an existing rbd_client, the structure found might already be in the process of getting destroyed and cleaned up. Here's the scenario, with "CLIENT" representing an existing rbd_client that's involved in the race: Thread on CPU A | Thread on CPU B --------------- | --------------- rbd_put_client(CLIENT) | rbd_get_client() kref_put() | (acquires node_lock) kref->refcount becomes 0 | __rbd_client_find() returns CLIENT calls rbd_client_release() | kref_get(&CLIENT->kref); | (releases node_lock) (acquires node_lock) | deletes CLIENT from list | ...and starts using CLIENT... (releases node_lock) | and frees CLIENT | <-- but CLIENT gets freed here Fix this by having rbd_put_client() acquire node_lock. The result could still be improved, but at least it avoids this problem. Signed-off-by: Alex Elder Signed-off-by: Sage Weil --- drivers/block/rbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7d8f8ddb3359..7f40cb4553c9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -407,15 +407,15 @@ done_err: /* * Destroy ceph client + * + * Caller must hold node_lock. */ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); dout("rbd_release_client %p\n", rbdc); - spin_lock(&node_lock); list_del(&rbdc->node); - spin_unlock(&node_lock); ceph_destroy_client(rbdc->client); kfree(rbdc->rbd_opts); @@ -428,7 +428,9 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { + spin_lock(&node_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); + spin_unlock(&node_lock); rbd_dev->rbd_client = NULL; rbd_dev->client = NULL; } From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Feb 2012 15:29:08 -0800 Subject: [PATCH 173/316] include/linux/lp8727.h: Remove executable bit Signed-off-by: Josh Triplett Signed-off-by: Linus Torvalds --- include/linux/lp8727.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 include/linux/lp8727.h diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h old mode 100755 new mode 100644 From 331818f1c468a24e581aedcbe52af799366a9dfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Feb 2012 18:30:53 -0500 Subject: [PATCH 174/316] NFSv4: Fix an Oops in the NFSv4 getacl code Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap in nfsv4 get acl data) introduces the 'acl_scratch' page for the case where we may need to decode multi-page data. However it fails to take into account the fact that the variable may be NULL (for the case where we're not doing multi-page decode), and it also attaches it to the encoding xdr_stream rather than the decoding one. The immediate result is an Oops in nfs4_xdr_enc_getacl due to the call to page_address() with a NULL page pointer. Signed-off-by: Trond Myklebust Cc: Andy Adamson Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4xdr.c | 5 ++++- include/linux/nfs_xdr.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0c849c98fe4..d202e04aca94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu } if (npages > 1) { /* for decoding across pages */ - args.acl_scratch = alloc_page(GFP_KERNEL); - if (!args.acl_scratch) + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) goto out_free; } args.acl_len = npages * PAGE_SIZE; @@ -3612,8 +3612,8 @@ out_free: for (i = 0; i < npages; i++) if (pages[i]) __free_page(pages[i]); - if (args.acl_scratch) - __free_page(args.acl_scratch); + if (res.acl_scratch) + __free_page(res.acl_scratch); return ret; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 95e92e438407..33bd8d0f745d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); - xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); encode_nops(&hdr); } @@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a764cef06b73..d6ba9a12591e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -614,7 +614,6 @@ struct nfs_getaclargs { size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct page * acl_scratch; struct nfs4_sequence_args seq_args; }; @@ -624,6 +623,7 @@ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; int acl_flags; + struct page * acl_scratch; struct nfs4_sequence_res seq_res; }; From 85c0d24f026ca6935897694be4eb0b5c514b907d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Feb 2012 10:31:35 -0500 Subject: [PATCH 175/316] SUNRPC: Fix up sunrpc trace events The reporting of the RPC queue name needs to use the __string() event interface. Reported-by: Neil Horman Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 51cc9490919f..ec8668d978d9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -75,21 +75,21 @@ DECLARE_EVENT_CLASS(rpc_task_queued, TP_STRUCT__entry( __field(const struct rpc_clnt *, clnt) __field(const struct rpc_task *, task) - __field(const struct rpc_wait_queue *, queue) __field(unsigned long, timeout) __field(unsigned long, runstate) __field(int, status) __field(unsigned short, flags) + __string(q_name, rpc_qname(q)) ), TP_fast_assign( __entry->clnt = clnt; __entry->task = task; - __entry->queue = q; __entry->timeout = task->tk_timeout; __entry->runstate = task->tk_runstate; __entry->status = task->tk_status; __entry->flags = task->tk_flags; + __assign_str(q_name, rpc_qname(q)); ), TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", @@ -99,7 +99,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __entry->runstate, __entry->status, __entry->timeout, - rpc_qname(__entry->queue) + __get_str(q_name) ) ); From 5753cba17611af108995672c4e2d978014e17a56 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Mon, 6 Feb 2012 10:08:08 -0500 Subject: [PATCH 176/316] SUNRPC: Adding status trace points This patch adds three trace points to the status routines in the sunrpc state machine. The goal of these trace points is to give an Admin the ability to check on binding status or connection status to see if there is a potential problem. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 53 +++++++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 4 +++ 2 files changed, 57 insertions(+) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ec8668d978d9..43be87d5dd58 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -8,6 +8,59 @@ #include #include +DECLARE_EVENT_CLASS(rpc_task_status, + + TP_PROTO(struct rpc_task *task), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(const struct rpc_task *, task) + __field(const struct rpc_clnt *, clnt) + __field(int, status) + ), + + TP_fast_assign( + __entry->task = task; + __entry->clnt = task->tk_client; + __entry->status = task->tk_status; + ), + + TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) +); + +DEFINE_EVENT(rpc_task_status, rpc_call_status, + TP_PROTO(struct rpc_task *task), + + TP_ARGS(task) +); + +DEFINE_EVENT(rpc_task_status, rpc_bind_status, + TP_PROTO(struct rpc_task *task), + + TP_ARGS(task) +); + +TRACE_EVENT(rpc_connect_status, + TP_PROTO(struct rpc_task *task, int status), + + TP_ARGS(task, status), + + TP_STRUCT__entry( + __field(const struct rpc_task *, task) + __field(const struct rpc_clnt *, clnt) + __field(int, status) + ), + + TP_fast_assign( + __entry->task = task; + __entry->clnt = task->tk_client; + __entry->status = status; + ), + + TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) +); + DECLARE_EVENT_CLASS(rpc_task_running, TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index db7220d87732..bb7ed2f3aee6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "sunrpc.h" #include "netns.h" @@ -1247,6 +1248,7 @@ call_bind_status(struct rpc_task *task) return; } + trace_rpc_bind_status(task); switch (task->tk_status) { case -ENOMEM: dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); @@ -1346,6 +1348,7 @@ call_connect_status(struct rpc_task *task) return; } + trace_rpc_connect_status(task, status); switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: @@ -1534,6 +1537,7 @@ call_status(struct rpc_task *task) return; } + trace_rpc_call_status(task); task->tk_status = 0; switch(status) { case -EHOSTDOWN: From 883381246c5ac2c29b849fe619f55fa5961ee76d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Feb 2012 15:18:48 -0500 Subject: [PATCH 177/316] SUNRPC: Change the default limit to the number of TCP slots Since the scheme of limiting the number of TCP slots to whatever will fit in the current TCP window seems to be working well (Andy reports getting within 20% of the 'iperf' send performance on a 10GigE link) we should just let that be the default mode of operation. Users may still set their own limits using the tcp_max_slot_table_entries parameter if they need to. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c23469308b8e..9e101c1b81cf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -190,7 +190,7 @@ struct nfs_server { /* maximum number of slots to use */ -#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE +#define NFS4_MAX_SLOT_TABLE (128U) #if defined(CONFIG_NFS_V4) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 15518a152ac3..b033f366d5f6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -21,8 +21,8 @@ #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) -#define RPC_MAX_SLOT_TABLE (128U) #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) +#define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT /* * This describes a timeout strategy From 1cab0652ba985d11b67645bd344c39ebb6cd28a2 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 31 Jan 2012 10:39:29 -0500 Subject: [PATCH 178/316] NFS: Pass a stateid to test_stateid() and free_stateid() This takes the guesswork out of what stateid to use. The caller is expected to figure this out and pass in the correct one. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 37 ++++++++++++++++++++++--------------- fs/nfs/nfs4xdr.c | 3 ++- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1bb0be36a726..8491d775e23c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -82,8 +82,8 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); #ifdef CONFIG_NFS_V4_1 -static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); -static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); +static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); #endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) @@ -1728,10 +1728,10 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st int status; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, state); + status = nfs41_test_stateid(server, &state->stateid); if (status == NFS_OK) return 0; - nfs41_free_stateid(server, state); + nfs41_free_stateid(server, &state->stateid); return nfs4_open_expired(sp, state); } #endif @@ -4509,10 +4509,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques int status; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, state); + status = nfs41_test_stateid(server, &state->stateid); if (status == NFS_OK) return 0; - nfs41_free_stateid(server, state); + nfs41_free_stateid(server, &state->stateid); return nfs4_lock_expired(state, request); } #endif @@ -6142,10 +6142,12 @@ out_freepage: out: return err; } -static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) + +static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { + int status; struct nfs41_test_stateid_args args = { - .stateid = &state->stateid, + .stateid = stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -6153,26 +6155,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta .rpc_argp = &args, .rpc_resp = &res, }; + nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + + if (status == NFS_OK) + return res.status; + return status; } -static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, state), + _nfs41_test_stateid(server, stateid), &exception); } while (exception.retry); return err; } -static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs41_free_stateid_args args = { - .stateid = &state->stateid, + .stateid = stateid, }; struct nfs41_free_stateid_res res; struct rpc_message msg = { @@ -6185,13 +6192,13 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); } -static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, state), + _nfs4_free_stateid(server, stateid), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca288d115b54..5d1caac0656d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5671,7 +5671,8 @@ static int decode_test_stateid(struct xdr_stream *xdr, if (unlikely(!p)) goto out_overflow; res->status = be32_to_cpup(p++); - return res->status; + + return status; out_overflow: print_overflow_msg(__func__, xdr); out: From b01dd1d8fae6178cbec374b90da2e4a3b8dce9ba Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 31 Jan 2012 10:39:30 -0500 Subject: [PATCH 179/316] NFS: Call test_stateid() and free_stateid() with correct stateids I noticed that test_stateid() was always using the same stateid for open and lock recovery. After poking around a bit, I discovered that it was always testing with a delegation stateid (even if there was no delegation present). I figured this wasn't correct, so now delegation and open stateids are tested during open_expired() and lock stateids are tested during lock_expired(). Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 58 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8491d775e23c..aaaf98ba8956 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1723,15 +1723,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) { - int status; + int status = NFS_OK; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, &state->stateid); - if (status == NFS_OK) - return 0; - nfs41_free_stateid(server, &state->stateid); + if (state->flags & flags) { + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + nfs41_free_stateid(server, stateid); + state->flags &= ~flags; + } + } + return status; +} + +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int deleg_status, open_status; + int deleg_flags = 1 << NFS_DELEGATED_STATE; + int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); + + deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); + open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + + if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) + return NFS_OK; return nfs4_open_expired(sp, state); } #endif @@ -4504,15 +4521,34 @@ out: } #if defined(CONFIG_NFS_V4_1) -static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status; + int status, ret = NFS_OK; + struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, &state->stateid); + list_for_each_entry(lsp, &state->lock_states, ls_locks) { + if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + status = nfs41_test_stateid(server, &lsp->ls_stateid); + if (status != NFS_OK) { + nfs41_free_stateid(server, &lsp->ls_stateid); + lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; + ret = status; + } + } + }; + + return ret; +} + +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status = NFS_OK; + + if (test_bit(LK_STATE_IN_USE, &state->flags)) + status = nfs41_check_expired_locks(state); if (status == NFS_OK) - return 0; - nfs41_free_stateid(server, &state->stateid); + return status; return nfs4_lock_expired(state, request); } #endif From 87e3c0553fcbea79bf9f17fc5694484ecf3ae5e8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 1 Feb 2012 10:46:20 +0300 Subject: [PATCH 180/316] SUNRPC: remove an unneeded NULL check in xprt_connect() We check "task->tk_rqstp" and then we dereference it without checking on the next line. The only caller is call_connect() and that has a check which prevents it from calling xprt_connect() with a NULL. if (task->tk_status < 0) return; If "task->tk_rqstp" were NULL then "tk_status" would be -EAGAIN. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 839f6ef2326b..efe5495ecf65 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -715,9 +715,7 @@ void xprt_connect(struct rpc_task *task) if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { - if (task->tk_rqstp) - task->tk_rqstp->rq_bytes_sent = 0; - + task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); From f9fd2d9c1f3b512c9794abbbd76c77a6e6de57aa Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 26 Jan 2012 13:32:22 -0500 Subject: [PATCH 181/316] NFS: printks in fs/nfs/ should start with NFS: Messages like "Got error -10052 from the server on DESTROY_SESSION. Session has been destroyed regardless" can be confusing to users who aren't very familiar with NFS. NOTE: This patch ignores any printks() that start by printing __func__ - that will be in a separate patch. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 3 ++- fs/nfs/nfs4filelayout.c | 2 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ff084d258c41..91b1e2a82146 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -175,7 +175,8 @@ int nfs_idmap_init(void) struct key *keyring; int ret = 0; - printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); + printk(KERN_NOTICE "NFS: Registering the %s key type\n", + key_type_id_resolver.name); cred = prepare_kernel_cred(NULL); if (!cred) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b4f8f9624afa..9a058b8c2888 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -575,7 +575,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err_free; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { - printk(KERN_ERR "Too big fh %d received %d\n", + printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err_free; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aaaf98ba8956..34e525549f85 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5406,7 +5406,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) if (status) printk(KERN_WARNING - "Got error %d from the server on DESTROY_SESSION. " + "NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); dprintk("<-- nfs4_proc_destroy_session\n"); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7d098604802c..b43a65d7faca 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1764,7 +1764,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" + printk(KERN_WARNING "NFS: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5d1caac0656d..2adcc979e5df 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1023,7 +1023,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const * Now we backfill the bitmap and the attribute buffer length. */ if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", + printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", len, ((char *)p - (char *)q) + 4); BUG(); } From a030889a01d1bea921e1a7501010b7b891d2abd2 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 26 Jan 2012 13:32:23 -0500 Subject: [PATCH 182/316] NFS: start printks w/ NFS: even if __func__ shown This patch addresses printks that have some context to show that they are from fs/nfs/, but for the sake of consistency now start with NFS: Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayoutdev.c | 2 +- fs/nfs/blocklayout/blocklayoutdm.c | 2 +- fs/nfs/callback.c | 2 +- fs/nfs/callback_xdr.c | 6 +++--- fs/nfs/idmap.c | 6 ++++-- fs/nfs/inode.c | 2 +- fs/nfs/nfs4filelayout.c | 6 ++++-- fs/nfs/nfs4filelayoutdev.c | 10 +++++----- fs/nfs/nfs4proc.c | 7 ++++--- fs/nfs/nfs4state.c | 12 ++++++------ fs/nfs/nfs4xdr.c | 10 +++++----- fs/nfs/objlayout/objio_osd.c | 6 +++--- fs/nfs/objlayout/objlayout.c | 6 +++--- fs/nfs/pnfs.c | 14 +++++++------- 14 files changed, 48 insertions(+), 43 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 94ed978860c0..b48f782a94ad 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) *rp = xdr_decode_hyper(*rp, &s); if (s & 0x1ff) { - printk(KERN_WARNING "%s: sector not aligned\n", __func__); + printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); return -1; } *sp = s >> SECTOR_SHIFT; diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 970490f556de..a0f588fa49c1 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -91,7 +91,7 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) dprintk("%s Releasing\n", __func__); rv = nfs4_blkdev_put(bdev->bm_mdev); if (rv) - printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", + printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n", __func__, rv); dev_remove(bdev->net, bdev->bm_mdev->bd_dev); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index d81040a7efc4..4a122ae71762 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp) } if (err < 0) { if (err != preverr) { - printk(KERN_WARNING "%s: unexpected error " + printk(KERN_WARNING "NFS: %s: unexpected error " "from svc_recv (%d)\n", __func__, err); preverr = err; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d50b2742f23b..2f45aa717423 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -73,7 +73,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) p = xdr_inline_decode(xdr, nbytes); if (unlikely(p == NULL)) - printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); return p; } @@ -155,7 +155,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound return status; /* We do not like overly long tags! */ if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { - printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); } @@ -167,7 +167,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (hdr->minorversion <= 1) { hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { - printk(KERN_WARNING "%s: NFSv4 server callback with " + printk(KERN_WARNING "NFS: %s: NFSv4 server callback with " "illegal minor version %u!\n", __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 91b1e2a82146..62264e0b1ddb 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -558,11 +558,13 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, * here. */ if (rpc_rmdir(parent)) - printk(KERN_ERR "%s: failed to remove clnt dir!\n", __func__); + printk(KERN_ERR "NFS: %s: failed to remove " + "clnt dir!\n", __func__); } break; default: - printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); + printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, + event); return -ENOTSUPP; } return err; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d2c760e193f4..028464bcbe0e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1407,7 +1407,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ - printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); out_err: /* diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9a058b8c2888..79be7acc9bae 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -367,7 +367,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", + __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; @@ -797,7 +798,8 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", + __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 6eb59b044bfc..80fce8dade2e 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -554,7 +554,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) cnt = be32_to_cpup(p); dprintk("%s stripe count %d\n", __func__, cnt); if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { - printk(KERN_WARNING "%s: stripe count %d greater than " + printk(KERN_WARNING "NFS: %s: stripe count %d greater than " "supported maximum %d\n", __func__, cnt, NFS4_PNFS_MAX_STRIPE_CNT); goto out_err_free_scratch; @@ -585,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) num = be32_to_cpup(p); dprintk("%s ds_num %u\n", __func__, num); if (num > NFS4_PNFS_MAX_MULTI_CNT) { - printk(KERN_WARNING "%s: multipath count %d greater than " + printk(KERN_WARNING "NFS: %s: multipath count %d greater than " "supported maximum %d\n", __func__, num, NFS4_PNFS_MAX_MULTI_CNT); goto out_err_free_stripe_indices; @@ -593,7 +593,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) /* validate stripe indices are all < num */ if (max_stripe_index >= num) { - printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", + printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", __func__, max_stripe_index, num); goto out_err_free_stripe_indices; } @@ -687,7 +687,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl new = decode_device(inode, dev, gfp_flags); if (!new) { - printk(KERN_WARNING "%s: Could not decode or add device\n", + printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", __func__); return NULL; } @@ -836,7 +836,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; if (ds == NULL) { - printk(KERN_ERR "%s: No data server for offset index %d\n", + printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); return NULL; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 34e525549f85..482ed97189c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4584,7 +4584,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Note: we always want to sleep here! */ request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); + printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " + "manager!\n", __func__); out_unlock: up_read(&nfsi->rwsem); out: @@ -4664,8 +4665,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); switch (err) { default: - printk(KERN_ERR "%s: unhandled error %d.\n", - __func__, err); + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); case 0: case -ESTALE: goto out; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b43a65d7faca..4e37818a34ef 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1138,8 +1138,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1185,8 +1185,8 @@ restart: spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) - printk("%s: Lock reclaim failed!\n", - __func__); + printk("NFS: %s: Lock reclaim " + "failed!\n", __func__); } spin_unlock(&state->state_lock); nfs4_put_open_state(state); @@ -1195,8 +1195,8 @@ restart: } switch (status) { default: - printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 2adcc979e5df..ae7834366712 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4468,8 +4468,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, return 0; } if (num > 1) - printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " - "per filesystem not supported\n", __func__); + printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " + "drivers per filesystem not supported\n", __func__); /* Decode and set first layout type, move xdr->p past unused types */ p = xdr_inline_decode(xdr, num * 4); @@ -5290,8 +5290,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr, attrs->max_reqs = be32_to_cpup(p++); nr_attrs = be32_to_cpup(p); if (unlikely(nr_attrs > 1)) { - printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", - __func__, nr_attrs); + printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs " + "count %u\n", __func__, nr_attrs); return -EINVAL; } if (nr_attrs == 1) { @@ -5448,7 +5448,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, dprintk("%s: num_dev %d\n", __func__, res->num_devs); if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { - printk(KERN_ERR "%s too many result dev_num %u\n", + printk(KERN_ERR "NFS: %s too many result dev_num %u\n", __func__, res->num_devs); return -EIO; } diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 55d01280a609..405a62bdb9b4 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -582,10 +582,10 @@ objlayout_init(void) if (ret) printk(KERN_INFO - "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", + "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", __func__, ret); else - printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", + printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", __func__); return ret; } @@ -594,7 +594,7 @@ static void __exit objlayout_exit(void) { pnfs_unregister_layoutdriver(&objlayout_type); - printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", + printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", __func__); } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index b3c29039f5b8..2bd185277adb 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -490,9 +490,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) if (!ioerr->oer_errno) continue; - printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " - "dev(%llx:%llx) par=0x%llx obj=0x%llx " - "offset=0x%llx length=0x%llx\n", + printk(KERN_ERR "NFS: %s: err[%d]: errno=%d " + "is_write=%d dev(%llx:%llx) par=0x%llx " + "obj=0x%llx offset=0x%llx length=0x%llx\n", __func__, i, ioerr->oer_errno, ioerr->oer_iswrite, _DEVID_LO(&ioerr->oer_component.oid_device_id), diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 92927878c2f8..a53421604bc4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { - printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, - id, server->nfs_client->cl_exchange_flags); + printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", + __func__, id, server->nfs_client->cl_exchange_flags); goto out_no_driver; } ld_type = find_pnfs_driver(id); @@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { - printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", - __func__, id); + printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " + "driver %u.\n", __func__, id); module_put(ld_type->owner); goto out_no_driver; } @@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) struct pnfs_layoutdriver_type *tmp; if (ld_type->id == 0) { - printk(KERN_ERR "%s id 0 is reserved\n", __func__); + printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); return status; } if (!ld_type->alloc_lseg || !ld_type->free_lseg) { - printk(KERN_ERR "%s Layout driver must provide " + printk(KERN_ERR "NFS: %s Layout driver must provide " "alloc_lseg and free_lseg.\n", __func__); return status; } @@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, ld_type->name); } else { - printk(KERN_ERR "%s Module with id %d already loaded!\n", + printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", __func__, ld_type->id); } spin_unlock(&pnfs_spinlock); From 2d3fe01c36a9b881fae89c5bdf4085a4d7d53ae1 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 3 Feb 2012 15:45:40 -0500 Subject: [PATCH 183/316] NFS: Fix comparison between DS address lists data_server_cache entries should only be treated as the same if the address list hasn't changed. A new entry will be made when an MDS changes an address list (as seen by GETDEVINFO). The old entry will be freed once all references are gone. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 71 +++++++++++++------------------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 80fce8dade2e..41677f0bf792 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } -/* - * Lookup DS by addresses. The first matching address returns true. - * nfs4_ds_cache_lock is held - */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(struct list_head *dsaddrs) +bool +_same_data_server_addrs_locked(const struct list_head *dsaddrs1, + const struct list_head *dsaddrs2) { - struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds_addr *da1, *da2; - list_for_each_entry(da1, dsaddrs, da_node) { - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - list_for_each_entry(da2, &ds->ds_addrs, da_node) { - if (same_sockaddr( - (struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return ds; - } - } + /* step through both lists, comparing as we go */ + for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), + da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); + da1 != NULL && da2 != NULL; + da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), + da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { + if (!same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return false; } - return NULL; + if (da1 == NULL && da2 == NULL) + return true; + + return false; } /* - * Compare two lists of addresses. + * Lookup DS by addresses. nfs4_ds_cache_lock is held */ -static bool -_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, - struct list_head *dsaddrs2) +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(const struct list_head *dsaddrs) { - struct nfs4_pnfs_ds_addr *da1, *da2; - size_t count1 = 0, - count2 = 0; + struct nfs4_pnfs_ds *ds; - list_for_each_entry(da1, dsaddrs1, da_node) - count1++; - - list_for_each_entry(da2, dsaddrs2, da_node) { - bool found = false; - count2++; - list_for_each_entry(da1, dsaddrs1, da_node) { - if (same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) { - found = true; - break; - } - } - if (!found) - return false; - } - - return (count1 == count2); + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + return ds; + return NULL; } /* @@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { - if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, - dsaddrs)) { - dprintk("%s: multipath address mismatch: %s != %s", - __func__, tmp_ds->ds_remotestr, remotestr); - } kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); From e6499c6f4b5f56a16f8b8ef60529c1da28b13aea Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 26 Jan 2012 16:54:23 -0500 Subject: [PATCH 184/316] NFS: Fall back on old idmapper if request_key() fails This patch removes the CONFIG_NFS_USE_NEW_IDMAPPER compile option. First, the idmapper will attempt to map the id using /sbin/request-key and nfsidmap. If this fails (if /etc/request-key.conf is not configured properly) then the idmapper will call the legacy code to perform the mapping. I left a comment stating where the legacy code begins to make it easier for somebody to remove in the future. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 11 ----- fs/nfs/idmap.c | 91 ++++++++++++++++----------------------- fs/nfs/sysctl.c | 2 - include/linux/nfs_idmap.h | 15 ------- 4 files changed, 37 insertions(+), 82 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index dbcd82126aed..021d2cf6938a 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -132,14 +132,3 @@ config NFS_USE_KERNEL_DNS select DNS_RESOLVER select KEYS default y - -config NFS_USE_NEW_IDMAPPER - bool "Use the new idmapper upcall routine" - depends on NFS_V4 && KEYS - help - Say Y here if you want NFS to use the new idmapper upcall functions. - You will need /sbin/request-key (usually provided by the keyutils - package). For details, read - . - - If you are unsure, say N. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 62264e0b1ddb..e0ecd5a7e19a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -142,8 +142,6 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) return snprintf(buf, buflen, "%u", id); } -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER - #include #include #include @@ -169,7 +167,7 @@ struct key_type key_type_id_resolver = { .read = user_read, }; -int nfs_idmap_init(void) +static int nfs_idmap_init_keyring(void) { struct cred *cred; struct key *keyring; @@ -211,7 +209,7 @@ failed_put_cred: return ret; } -void nfs_idmap_quit(void) +static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); @@ -328,43 +326,7 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, return ret; } -int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -{ - if (nfs_map_string_to_numeric(name, namelen, uid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "uid", uid); -} - -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) -{ - if (nfs_map_string_to_numeric(name, namelen, gid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "gid", gid); -} - -int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) -{ - int ret = -EINVAL; - - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); - if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); - return ret; -} -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) -{ - int ret = -EINVAL; - - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); - if (ret < 0) - ret = nfs_map_numeric_to_string(gid, buf, buflen); - return ret; -} - -#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ - +/* idmap classic begins here */ #include #include #include @@ -600,12 +562,21 @@ static struct notifier_block nfs_idmap_block = { int nfs_idmap_init(void) { - return rpc_pipefs_notifier_register(&nfs_idmap_block); + int ret; + ret = nfs_idmap_init_keyring(); + if (ret != 0) + goto out; + ret = rpc_pipefs_notifier_register(&nfs_idmap_block); + if (ret != 0) + nfs_idmap_quit_keyring(); +out: + return ret; } void nfs_idmap_quit(void) { rpc_pipefs_notifier_unregister(&nfs_idmap_block); + nfs_idmap_quit_keyring(); } /* @@ -930,19 +901,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = server->nfs_client->cl_idmap; + int ret = -EINVAL; if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; - return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); + ret = nfs_idmap_lookup_id(name, namelen, "uid", uid); + if (ret < 0) + ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); + return ret; } -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) { struct idmap *idmap = server->nfs_client->cl_idmap; + int ret = -EINVAL; - if (nfs_map_string_to_numeric(name, namelen, uid)) + if (nfs_map_string_to_numeric(name, namelen, gid)) return 0; - return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); + ret = nfs_idmap_lookup_id(name, namelen, "gid", gid); + if (ret < 0) + ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid); + return ret; } int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) @@ -950,22 +929,26 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); + if (ret < 0) + ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + } if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); + if (ret < 0) + ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf); + } if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); + ret = nfs_map_numeric_to_string(gid, buf, buflen); return ret; } - -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 978aaeb8a093..ad4d2e787b20 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, -#ifndef CONFIG_NFS_USE_NEW_IDMAPPER { .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, @@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ #endif { .procname = "nfs_mountpoint_timeout", diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 3c9eeb7da646..7eed2012d288 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -82,24 +82,9 @@ static inline void nfs_idmap_quit(void) {} #endif -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER - -static inline int nfs_idmap_new(struct nfs_client *clp) -{ - return 0; -} - -static inline void nfs_idmap_delete(struct nfs_client *clp) -{ -} - -#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ - int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ - void nfs_fattr_init_names(struct nfs_fattr *fattr, struct nfs4_string *owner_name, struct nfs4_string *group_name); From 3cd0f37a2cc9e4d6188df10041a2441eaa41d991 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 26 Jan 2012 16:54:24 -0500 Subject: [PATCH 185/316] NFS: Keep idmapper include files in one place Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 66 ++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index e0ecd5a7e19a..83f7d42d5c76 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -39,6 +39,37 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* include files needed by legacy idmapper */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nfs4_fs.h" +#include "internal.h" + +#define NFS_UINT_MAXLEN 11 +#define IDMAP_HASH_SZ 128 + +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600 * HZ; +const struct cred *id_resolver_cache; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields @@ -142,21 +173,6 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) return snprintf(buf, buflen, "%u", id); } -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define NFS_UINT_MAXLEN 11 - -const struct cred *id_resolver_cache; - struct key_type key_type_id_resolver = { .name = "id_resolver", .instantiate = user_instantiate, @@ -327,26 +343,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, } /* idmap classic begins here */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "nfs4_fs.h" -#include "internal.h" - -#define IDMAP_HASH_SZ 128 - -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600 * HZ; - static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) { char *endp; From a602bea3e7ccc5ce3da61d2c18245c4058983926 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 26 Jan 2012 16:54:25 -0500 Subject: [PATCH 186/316] NFS: Update idmapper documentation Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- Documentation/filesystems/nfs/idmapper.txt | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt index 120fd3cf7fd9..fe03d10bb79a 100644 --- a/Documentation/filesystems/nfs/idmapper.txt +++ b/Documentation/filesystems/nfs/idmapper.txt @@ -4,13 +4,21 @@ ID Mapper ========= Id mapper is used by NFS to translate user and group ids into names, and to translate user and group names into ids. Part of this translation involves -performing an upcall to userspace to request the information. Id mapper will -user request-key to perform this upcall and cache the result. The program -/usr/sbin/nfs.idmap should be called by request-key, and will perform the -translation and initialize a key with the resulting information. +performing an upcall to userspace to request the information. There are two +ways NFS could obtain this information: placing a call to /sbin/request-key +or by placing a call to the rpc.idmap daemon. + +NFS will attempt to call /sbin/request-key first. If this succeeds, the +result will be cached using the generic request-key cache. This call should +only fail if /etc/request-key.conf is not configured for the id_resolver key +type, see the "Configuring" section below if you wish to use the request-key +method. + +If the call to /sbin/request-key fails (if /etc/request-key.conf is not +configured with the id_resolver key type), then the idmapper will ask the +legacy rpc.idmap daemon for the id mapping. This result will be stored +in a custom NFS idmap cache. - NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this - feature. =========== Configuring From 6b13168b36b6a7f603d962c232f1f2f325705832 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:05 +0000 Subject: [PATCH 187/316] NFS: make nfs_client_list per net ns This patch splits global list of NFS clients into per-net-ns array of lists. This looks more strict and clearer. BTW, this patch also makes "/proc/fs/nfsfs/servers" entry content depends on /proc mount owner pid namespace. See below for details. NOTE: few words about how was /proc/fs/nfsfs/ entries content show per network namespace done. This is a little bit tricky and not the best is could be. But it's cheap (proper fix for /proc conteinerization is a hard nut to crack). The idea is simple: take proper network namespace from pid namespace child reaper nsproxy of /proc/ mount creator. This actually means, that if there are 2 containers with different net namespace sharing pid namespace, then read of /proc/fs/nfsfs/ entries will always return content, taken from net namespace of pid namespace creator task (and thus second namespace set wil be unvisible). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 38 +++++++++++++++++++++++++++----------- fs/nfs/idmap.c | 5 ++--- fs/nfs/inode.c | 1 + fs/nfs/internal.h | 2 +- fs/nfs/netns.h | 1 + 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 98af1cb28ee3..058eb9bcfa9d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include @@ -49,11 +51,11 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT DEFINE_SPINLOCK(nfs_client_lock); -LIST_HEAD(nfs_client_list); static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 @@ -464,8 +466,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat { struct nfs_client *clp; const struct sockaddr *sap = data->addr; + struct nfs_net *nn = net_generic(data->net, nfs_net_id); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) @@ -483,9 +486,6 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat /* Match the full socket address */ if (!nfs_sockaddr_cmp(sap, clap)) continue; - /* Match network namespace */ - if (clp->net != data->net) - continue; atomic_inc(&clp->cl_count); return clp; @@ -506,6 +506,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; int error; + struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); dprintk("--> nfs_get_client(%s,v%u)\n", cl_init->hostname ?: "", cl_init->rpc_ops->version); @@ -531,7 +532,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* install a new client and return with it unready */ install_client: clp = new; - list_add(&clp->cl_share_link, &nfs_client_list); + list_add(&clp->cl_share_link, &nn->nfs_client_list); spin_unlock(&nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, @@ -1227,9 +1228,10 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(&init_net, nfs_net_id); spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1757,6 +1759,13 @@ out_free_server: return ERR_PTR(error); } +void nfs_clients_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + INIT_LIST_HEAD(&nn->nfs_client_list); +} + #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_fs_nfs; @@ -1810,13 +1819,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; + struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; + struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_server_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = PDE(inode)->data; + m->private = net; return 0; } @@ -1826,9 +1837,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { + struct nfs_net *nn = net_generic(m->private, nfs_net_id); + /* lock the list against modification */ spin_lock(&nfs_client_lock); - return seq_list_start_head(&nfs_client_list, *_pos); + return seq_list_start_head(&nn->nfs_client_list, *_pos); } /* @@ -1836,7 +1849,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - return seq_list_next(v, &nfs_client_list, pos); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + return seq_list_next(v, &nn->nfs_client_list, pos); } /* @@ -1853,9 +1868,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nfs_client_list) { + if (v == &nn->nfs_client_list) { seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); return 0; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 83f7d42d5c76..2f78f0ce2664 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -532,13 +532,12 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct super_block *sb = ptr; + struct nfs_net *nn = net_generic(sb->s_fs_info, nfs_net_id); struct nfs_client *clp; int error = 0; spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - if (clp->net != sb->s_fs_info) - continue; + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (clp->rpc_ops != &nfs_v4_clientops) continue; error = __rpc_pipefs_event(clp, event, sb); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 028464bcbe0e..0365b84cc2c7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1558,6 +1558,7 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + nfs_clients_init(net); return nfs_dns_resolver_cache_init(net); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cdb121d3c6f4..a9ae8069fff9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -146,6 +146,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; +extern void nfs_clients_init(struct net *net); extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); @@ -183,7 +184,6 @@ static inline void nfs_fs_proc_exit(void) #endif #ifdef CONFIG_NFS_V4 extern spinlock_t nfs_client_lock; -extern struct list_head nfs_client_list; #endif /* nfs4namespace.c */ diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 39ae4cad5b4b..feb33c3f9a56 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -7,6 +7,7 @@ struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; + struct list_head nfs_client_list; }; extern int nfs_net_id; From c25d32b26361ce0814fef2281f164866c18c8692 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:14 +0000 Subject: [PATCH 188/316] NFS: make nfs_volume_list per net ns This patch splits global list of NFS servers into per-net-ns array of lists. This looks more strict and clearer. BTW, this patch also makes "/proc/fs/nfsfs/volumes" content depends on /proc mount owner pid namespace. See below for details. NOTE: few words about how was /proc/fs/nfsfs/ entries content show per network namespace done. This is a little bit tricky and not the best is could be. But it's cheap (proper fix for /proc conteinerization is a hard nut to crack). The idea is simple: take proper network namespace from pid namespace child reaper nsproxy of /proc/ mount creator. This actually means, that if there are 2 containers with different net namespace sharing pid namespace, then read of /proc/fs/nfsfs/ entries will always return content, taken from net namespace of pid namespace creator task (and thus second namespace set wil be unvisible). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 20 ++++++++++++++------ fs/nfs/netns.h | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 058eb9bcfa9d..d58e8386e6bc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -56,7 +56,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT DEFINE_SPINLOCK(nfs_client_lock); -static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ @@ -1036,10 +1035,11 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve static void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); spin_lock(&nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); + list_add_tail(&server->master_link, &nn->nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); spin_unlock(&nfs_client_lock); @@ -1764,6 +1764,7 @@ void nfs_clients_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); INIT_LIST_HEAD(&nn->nfs_client_list); + INIT_LIST_HEAD(&nn->nfs_volume_list); } #ifdef CONFIG_PROC_FS @@ -1900,13 +1901,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; + struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; + struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_volume_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = PDE(inode)->data; + m->private = net; return 0; } @@ -1916,9 +1919,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) { + struct nfs_net *nn = net_generic(m->private, nfs_net_id); + /* lock the list against modification */ spin_lock(&nfs_client_lock); - return seq_list_start_head(&nfs_volume_list, *_pos); + return seq_list_start_head(&nn->nfs_volume_list, *_pos); } /* @@ -1926,7 +1931,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { - return seq_list_next(v, &nfs_volume_list, pos); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + return seq_list_next(v, &nn->nfs_volume_list, pos); } /* @@ -1945,9 +1952,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) struct nfs_server *server; struct nfs_client *clp; char dev[8], fsid[17]; + struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nfs_volume_list) { + if (v == &nn->nfs_volume_list) { seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); return 0; } diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index feb33c3f9a56..0fbd4e017d27 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -8,6 +8,7 @@ struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; struct list_head nfs_client_list; + struct list_head nfs_volume_list; }; extern int nfs_net_id; From 28cd1b3f262dba56b5e335ba668e342d530f6129 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:22 +0000 Subject: [PATCH 189/316] NFS: make cb_ident_idr per net ns This patch makes ID's infrastructure network namespace aware. This was done mainly because of nfs_client_lock, which is desired to be per network namespace, but protects NFS clients ID's. NOTE: NFS client's net pointer have to be set prior to ID initialization, proper assignment was moved. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- fs/nfs/client.c | 28 ++++++++++++++++++---------- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 4 ++-- fs/nfs/netns.h | 3 +++ 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 2f45aa717423..e14af46bd2c6 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -876,7 +876,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d58e8386e6bc..f51b2795ce07 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -58,7 +58,6 @@ DEFINE_SPINLOCK(nfs_client_lock); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 -static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ /* * Get a unique NFSv4.0 callback identifier which will be used @@ -67,14 +66,15 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) { int ret = 0; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; retry: - if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) return -ENOMEM; spin_lock(&nfs_client_lock); - ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); spin_unlock(&nfs_client_lock); if (ret == -EAGAIN) goto retry; @@ -173,6 +173,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; + clp->net = cl_init->net; #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -191,7 +192,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ if (!IS_ERR(cred)) clp->cl_machine_cred = cred; nfs_fscache_get_client_cookie(clp); - clp->net = cl_init->net; return clp; @@ -236,16 +236,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { - idr_destroy(&cb_ident_idr); + struct nfs_net *nn = net_generic(net, nfs_net_id); + + idr_destroy(&nn->cb_ident_idr); } /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + if (clp->cl_cb_ident) - idr_remove(&cb_ident_idr, clp->cl_cb_ident); + idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); } static void pnfs_init_server(struct nfs_server *server) @@ -263,7 +267,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { } -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -1203,12 +1207,13 @@ error: * Find a client by callback identifier */ struct nfs_client * -nfs4_find_client_ident(int cb_ident) +nfs4_find_client_ident(struct net *net, int cb_ident) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); spin_lock(&nfs_client_lock); - clp = idr_find(&cb_ident_idr, cb_ident); + clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); spin_unlock(&nfs_client_lock); @@ -1765,6 +1770,9 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); +#ifdef CONFIG_NFS_V4 + idr_init(&nn->cb_ident_idr); +#endif } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0365b84cc2c7..6c662598f885 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1565,6 +1565,7 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { nfs_dns_resolver_cache_destroy(net); + nfs_cleanup_cb_ident_idr(net); } static struct pernet_operations nfs_net_ops = { @@ -1674,7 +1675,6 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif - nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a9ae8069fff9..958fff2927c0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,9 +148,9 @@ extern void nfs_umount(const struct nfs_mount_request *info); extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); -extern void nfs_cleanup_cb_ident_idr(void); +extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_ident(int); +extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 0fbd4e017d27..547cc9525ba2 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -9,6 +9,9 @@ struct nfs_net { struct rpc_pipe *bl_device_pipe; struct list_head nfs_client_list; struct list_head nfs_volume_list; +#ifdef CONFIG_NFS_V4 + struct idr cb_ident_idr; /* Protected by nfs_client_lock */ +#endif }; extern int nfs_net_id; From dc03085834a4530b2514708a643cd3fe38f35b21 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:31 +0000 Subject: [PATCH 190/316] NFS: make nfs_client_lock per net ns This patch makes nfs_clients_lock allocated per network namespace. All items it protects are already network namespace aware. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 51 +++++++++++++++++++++++++++-------------------- fs/nfs/idmap.c | 4 ++-- fs/nfs/internal.h | 3 --- fs/nfs/netns.h | 1 + 4 files changed, 32 insertions(+), 27 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f51b2795ce07..9e11d2988830 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -55,7 +55,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -DEFINE_SPINLOCK(nfs_client_lock); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 @@ -73,9 +72,9 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) retry: if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) return -ENOMEM; - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); if (ret == -EAGAIN) goto retry; return ret; @@ -313,15 +312,18 @@ static void nfs_free_client(struct nfs_client *clp) */ void nfs_put_client(struct nfs_client *clp) { + struct nfs_net *nn; + if (!clp) return; dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + nn = net_generic(clp->net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); BUG_ON(!list_empty(&clp->cl_superblocks)); @@ -516,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* see if the client already exists */ do { - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); clp = nfs_match_client(cl_init); if (clp) @@ -524,7 +526,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (new) goto install_client; - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); new = nfs_alloc_client(cl_init); } while (!IS_ERR(new)); @@ -536,7 +538,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, install_client: clp = new; list_add(&clp->cl_share_link, &nn->nfs_client_list); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, authflavour, noresvport); @@ -551,7 +553,7 @@ install_client: * - make sure it's ready before returning */ found_client: - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); if (new) nfs_free_client(new); @@ -1041,24 +1043,25 @@ static void nfs_server_insert_lists(struct nfs_server *server) struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); list_add_tail(&server->master_link, &nn->nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); } static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); if (clp && list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); synchronize_rcu(); } @@ -1212,11 +1215,11 @@ nfs4_find_client_ident(struct net *net, int cb_ident) struct nfs_client *clp; struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } @@ -1235,7 +1238,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs_client *clp; struct nfs_net *nn = net_generic(&init_net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1249,10 +1252,10 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, continue; atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return NULL; } @@ -1849,7 +1852,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* lock the list against modification */ - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_client_list, *_pos); } @@ -1868,7 +1871,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* @@ -1930,7 +1935,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* lock the list against modification */ - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_volume_list, *_pos); } @@ -1949,7 +1954,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2f78f0ce2664..d2afcd8354ef 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -536,7 +536,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, struct nfs_client *clp; int error = 0; - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (clp->rpc_ops != &nfs_v4_clientops) continue; @@ -544,7 +544,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, if (error) break; } - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return error; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 958fff2927c0..b38b73347af5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -182,9 +182,6 @@ static inline void nfs_fs_proc_exit(void) { } #endif -#ifdef CONFIG_NFS_V4 -extern spinlock_t nfs_client_lock; -#endif /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 547cc9525ba2..7baad89ae60e 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -12,6 +12,7 @@ struct nfs_net { #ifdef CONFIG_NFS_V4 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif + spinlock_t nfs_client_lock; }; extern int nfs_net_id; From bc224f539dcce7805d4bfb68a92f0fe8bb102c22 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:11:33 +0400 Subject: [PATCH 191/316] NFS: pass proper net rpc_pton() in nfs_dns_resolve_name() Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index be9a530987b3..fcd8f1d7430f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -20,7 +20,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); if (ip_len > 0) - ret = rpc_pton(&init_net, ip_addr, ip_len, sa, salen); + ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else ret = -ESRCH; kfree(ip_addr); From c7add9a9720ff5be4715f7a0bb0d9578b2e8534e Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:11:49 +0400 Subject: [PATCH 192/316] NFS: search for client session id in proper network namespace Network namespace is taken from request transport and passed as a part of cb_process_state structure. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 1 + fs/nfs/callback_proc.c | 2 +- fs/nfs/callback_xdr.c | 1 + fs/nfs/client.c | 4 ++-- fs/nfs/internal.h | 3 ++- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c89d3b9e483c..197e0d3754c2 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -39,6 +39,7 @@ struct cb_process_state { __be32 drc_status; struct nfs_client *clp; int slotid; + struct net *net; }; struct cb_compound_hdr_arg { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0e6e63f55db4..f71978d107d0 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -461,7 +461,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, int i; __be32 status = htonl(NFS4ERR_BADSESSION); - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); + clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e14af46bd2c6..2e372240d028 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -861,6 +861,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r .drc_status = 0, .clp = NULL, .slotid = -1, + .net = rqstp->rq_xprt->xpt_net, }; unsigned int nops = 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9e11d2988830..2328dcbf6c0b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1232,11 +1232,11 @@ nfs4_find_client_ident(struct net *net, int cb_ident) * Returns NULL if no such client */ struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *addr, +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(&init_net, nfs_net_id); + struct nfs_net *nn = net_generic(net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b38b73347af5..0c3648a947d1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -152,7 +152,8 @@ extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); +nfs4_find_client_sessionid(struct net *, const struct sockaddr *, + struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); From b48e127884b117b429d3473577b9dc3f2b42b8eb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:11:57 +0400 Subject: [PATCH 193/316] NFS: pass current net to rpc_pton() while parsing mount options Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8e210b2c16d7..94667848af9a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1408,7 +1408,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->nfs_server.addrlen = - rpc_pton(&init_net, string, strlen(string), + rpc_pton(mnt->net, string, strlen(string), (struct sockaddr *) &mnt->nfs_server.address, sizeof(mnt->nfs_server.address)); @@ -1430,7 +1430,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->mount_server.addrlen = - rpc_pton(&init_net, string, strlen(string), + rpc_pton(mnt->net, string, strlen(string), (struct sockaddr *) &mnt->mount_server.address, sizeof(mnt->mount_server.address)); From 33faaa380e9ec4b93503cae8c9969fb599f0f283 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:12:05 +0400 Subject: [PATCH 194/316] NFS: pass transport net to rpc_pton() while parse server name Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 48a9acdbaeb6..667ea7406fd3 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -97,11 +97,11 @@ static size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, size_t salen, struct nfs_server *server) { ssize_t ret; + struct net *net = server->client->cl_xprt->xprt_net; - ret = rpc_pton(&init_net, string, len, sa, salen); + ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(server->client->cl_xprt->xprt_net, - string, len, sa, salen); + ret = nfs_dns_resolve_name(net, string, len, sa, salen); if (ret < 0) ret = 0; } From 17347d03c008e2f504c33bb4905cdad0abc01319 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:11:41 +0400 Subject: [PATCH 195/316] NFS: build fixed in case of NFS_USE_NEW_IDMAPPER is undefined Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d2afcd8354ef..5a5566fa1619 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -62,6 +62,7 @@ #include #include "nfs4_fs.h" #include "internal.h" +#include "netns.h" #define NFS_UINT_MAXLEN 11 #define IDMAP_HASH_SZ 128 From b9f9a03150969e4bd9967c20bce67c4de769058f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 9 Feb 2012 15:31:36 -0500 Subject: [PATCH 196/316] NFSv4: Ensure we throw out bad delegation stateids on NFS4ERR_BAD_STATEID To ensure that we don't just reuse the bad delegation when we attempt to recover the nfs4_state that received the bad stateid error. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a53f33b4ac3a..45392032e7bd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; + if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) + nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } From 45d43c291e9a922d7b432b0dbcb1d8fb70d8410f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Feb 2012 19:38:51 -0500 Subject: [PATCH 197/316] NFSv4.1: Convert slotid from u8 to u32 It is perfectly legal to negotiate up to 2^32-1 slots in the protocol, and with 10GigE, we are already seeing that 255 slots is far too limiting. Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 +- fs/nfs/callback_xdr.c | 6 +++--- fs/nfs/nfs4proc.c | 42 +++++++++++++++++++-------------------- include/linux/nfs_fs_sb.h | 7 ++++--- include/linux/nfs_xdr.h | 2 +- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 197e0d3754c2..a5527c90a5aa 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,7 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - int slotid; + u32 slotid; struct net *net; }; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 2e372240d028..5466829c7e77 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -759,14 +759,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (cps->slotid != -1) + if (cps->slotid != NFS4_NO_SLOT) nfs4_callback_free_slot(cps->clp->cl_session); } @@ -860,7 +860,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, - .slotid = -1, + .slotid = NFS4_NO_SLOT, .net = rqstp->rq_xprt->xpt_net, }; unsigned int nops = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 482ed97189c9..f3f56f4a3b72 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -360,16 +360,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * When updating highest_used_slotid there may be "holes" in the bitmap * so we need to scan down from highest_used_slotid to 0 looking for the now * highest slotid in use. - * If none found, highest_used_slotid is set to -1. + * If none found, highest_used_slotid is set to NFS4_NO_SLOT. * * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) { - int slotid = free_slotid; - - BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); + BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -379,10 +377,10 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) if (slotid < tbl->max_slots) tbl->highest_used_slotid = slotid; else - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; } - dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, - free_slotid, tbl->highest_used_slotid); + dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + slotid, tbl->highest_used_slotid); } bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) @@ -402,7 +400,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) return; } - if (ses->fc_slot_table.highest_used_slotid != -1) + if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) return; dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); @@ -415,7 +413,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) void nfs4_check_drain_bc_complete(struct nfs4_session *ses) { if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || - ses->bc_slot_table.highest_used_slotid != -1) + ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) return; dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); complete(&ses->bc_slot_table.complete); @@ -510,25 +508,25 @@ static int nfs4_sequence_done(struct rpc_task *task, * nfs4_find_slot looks for an unset bit in the used_slots bitmap. * If found, we mark the slot as used, update the highest_used_slotid, * and respectively set up the sequence operation args. - * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. + * The slot number is returned if found, or NFS4_NO_SLOT otherwise. * * Note: must be called with under the slot_tbl_lock. */ -static u8 +static u32 nfs4_find_slot(struct nfs4_slot_table *tbl) { - int slotid; - u8 ret_id = NFS4_MAX_SLOT_TABLE; - BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); + u32 slotid; + u32 ret_id = NFS4_NO_SLOT; - dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", + dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slots); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); if (slotid >= tbl->max_slots) goto out; __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid) + if (slotid > tbl->highest_used_slotid || + tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; ret_id = slotid; out: @@ -555,7 +553,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - u8 slotid; + u32 slotid; dprintk("--> %s\n", __func__); /* slot already allocated? */ @@ -583,7 +581,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, } slotid = nfs4_find_slot(tbl); - if (slotid == NFS4_MAX_SLOT_TABLE) { + if (slotid == NFS4_NO_SLOT) { rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); @@ -5144,7 +5142,7 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, spin_lock(&tbl->slot_tbl_lock); tbl->max_slots = max_slots; tbl->slots = slot; - tbl->highest_used_slotid = -1; /* no slot is currently used */ + tbl->highest_used_slotid = NFS4_NO_SLOT; /* no slot is currently used */ spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); @@ -5196,13 +5194,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) return NULL; tbl = &session->fc_slot_table; - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); init_completion(&tbl->complete); tbl = &session->bc_slot_table; - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); init_completion(&tbl->complete); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 9e101c1b81cf..2ae57f2f645b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -191,6 +191,7 @@ struct nfs_server { /* maximum number of slots to use */ #define NFS4_MAX_SLOT_TABLE (128U) +#define NFS4_NO_SLOT ((u32)-1) #if defined(CONFIG_NFS_V4) @@ -201,10 +202,10 @@ struct nfs4_slot_table { unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ - int max_slots; /* # slots in table */ - int highest_used_slotid; /* sent to server on each SEQ. + u32 max_slots; /* # slots in table */ + u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ - int target_max_slots; /* Set by CB_RECALL_SLOT as + u32 target_max_slots; /* Set by CB_RECALL_SLOT as * the new max_slots */ struct completion complete; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 144419a9cbd3..adbc84ac345f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -181,7 +181,7 @@ struct nfs4_slot { struct nfs4_sequence_args { struct nfs4_session *sa_session; - u8 sa_slotid; + u32 sa_slotid; u8 sa_cache_this; }; From ef159e9177cc5a09e6174796dde0b2d243ddf28b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Feb 2012 19:50:40 -0500 Subject: [PATCH 198/316] NFSv4.1: Add a module parameter to set the number of session slots Add the module parameter 'max_session_slots' to set the initial number of slots that the NFSv4.1 client will attempt to negotiate with the server. Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 8 ++++++++ fs/nfs/nfs4proc.c | 8 +++++++- include/linux/nfs_fs_sb.h | 5 +++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 033d4e69b43b..1d369c6286e1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1657,6 +1657,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. of returning the full 64-bit number. The default is to return 64-bit inode numbers. + nfs.max_session_slots= + [NFSv4.1] Sets the maximum number of session slots + the client will attempt to negotiate with the server. + This limits the number of simultaneous RPC requests + that the client can send to the NFSv4.1 server. + Note that there is little point in setting this + value higher than the max_tcp_slot_table_limit. + nfs.nfs4_disable_idmapping= [NFSv4] When set to the default of '1', this option ensures that both the RPC level authentication diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f3f56f4a3b72..0b3316541734 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,6 +72,8 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) +static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; + struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -5245,7 +5247,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; - args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; + args->fc_attrs.max_reqs = max_session_slots; dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " "max_ops=%u max_reqs=%u\n", @@ -6390,6 +6392,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); + /* * Local variables: * c-basic-offset: 8 diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2ae57f2f645b..3bf47666646e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -190,13 +190,14 @@ struct nfs_server { /* maximum number of slots to use */ -#define NFS4_MAX_SLOT_TABLE (128U) +#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_MAX_SLOT_TABLE (256U) #define NFS4_NO_SLOT ((u32)-1) #if defined(CONFIG_NFS_V4) /* Sessions */ -#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) struct nfs4_slot_table { struct nfs4_slot *slots; /* seqid per slot */ unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ From 6f5133652eaab6fbd88bdb1b1fd2236fd82583cb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 14:09:00 +0400 Subject: [PATCH 199/316] SUNRPC: clear svc pools lists helper introduced This patch moves removing of service transport from it's pools ready lists to separated function. Also this clear is now done with list_for_each_entry_safe() helper. This is a precursor patch, which would be usefull with service shutdown in network namespace context, introduced later in the series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/svc_xprt.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index de1e1a8b526b..50bf7c1c731c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -932,26 +932,33 @@ static void svc_close_list(struct list_head *xprt_list) } } -void svc_close_all(struct svc_serv *serv) +static void svc_clear_pools(struct svc_serv *serv) { struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; int i; - svc_close_list(&serv->sv_tempsocks); - svc_close_list(&serv->sv_permsocks); - for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[i]; spin_lock_bh(&pool->sp_lock); - while (!list_empty(&pool->sp_sockets)) { - xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { list_del_init(&xprt->xpt_ready); } spin_unlock_bh(&pool->sp_lock); } +} + +void svc_close_all(struct svc_serv *serv) +{ + struct svc_xprt *xprt; + struct svc_xprt *tmp; + + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + + svc_clear_pools(serv); /* * At this point the sp_sockets lists will stay empty, since * svc_enqueue will not add new entries without taking the From 3a22bf506c9df47e93e8dc8a68d86cd8ae384d98 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 14:09:08 +0400 Subject: [PATCH 200/316] SUNRPC: clear svc transports lists helper introduced This patch moves service transports deletion from service sockets lists to separated function. This is a precursor patch, which would be usefull with service shutdown in network namespace context, introduced later in the series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/svc_xprt.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 50bf7c1c731c..493e70b72b71 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -950,11 +950,19 @@ static void svc_clear_pools(struct svc_serv *serv) } } -void svc_close_all(struct svc_serv *serv) +static void svc_clear_list(struct list_head *xprt_list) { struct svc_xprt *xprt; struct svc_xprt *tmp; + list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + svc_delete_xprt(xprt); + } + BUG_ON(!list_empty(xprt_list)); +} + +void svc_close_all(struct svc_serv *serv) +{ svc_close_list(&serv->sv_tempsocks); svc_close_list(&serv->sv_permsocks); @@ -964,13 +972,8 @@ void svc_close_all(struct svc_serv *serv) * svc_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) - svc_delete_xprt(xprt); - list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) - svc_delete_xprt(xprt); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + svc_clear_list(&serv->sv_tempsocks); + svc_clear_list(&serv->sv_permsocks); } /* From 7b147f1ff267d12e0d189ca3d4156ed5a76b8d99 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 14:09:17 +0400 Subject: [PATCH 201/316] SUNRPC: service destruction in network namespace context v2: Added comment to BUG_ON's in svc_destroy() to make code looks clearer. This patch introduces network namespace filter for service destruction function. Nothing special here - just do exactly the same operations, but only for tranports in passed networks namespace context. BTW, BUG_ON() checks for empty service transports lists were returned into svc_destroy() function. This is because of swithing generic svc_close_all() to networks namespace dependable svc_close_net(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svcsock.h | 2 +- net/sunrpc/svc.c | 13 +++++++++++-- net/sunrpc/svc_xprt.c | 27 +++++++++++++++++---------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index c84e9741cb2a..cb4ac69e1f33 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -34,7 +34,7 @@ struct svc_sock { /* * Function prototypes. */ -void svc_close_all(struct svc_serv *); +void svc_close_net(struct svc_serv *, struct net *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a8b49a044619..6cc0ea3d26f1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -517,6 +517,8 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_destroy(struct svc_serv *serv) { + struct net *net = current->nsproxy->net_ns; + dprintk("svc: svc_destroy(%s, %d)\n", serv->sv_program->pg_name, serv->sv_nrthreads); @@ -539,10 +541,17 @@ svc_destroy(struct svc_serv *serv) * caller is using--nfsd_mutex in the case of nfsd). So it's * safe to traverse those lists and shut everything down: */ - svc_close_all(serv); + svc_close_net(serv, net); + + /* + * The last user is gone and thus all sockets have to be destroyed to + * the point. Check this. + */ + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); if (serv->sv_shutdown) - serv->sv_shutdown(serv, current->nsproxy->net_ns); + serv->sv_shutdown(serv, net); cache_clean_deferred(serv); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 493e70b72b71..4bda09d7e1a4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -922,17 +922,19 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; list_for_each_entry(xprt, xprt_list, xpt_list) { + if (xprt->xpt_net != net) + continue; set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags); } } -static void svc_clear_pools(struct svc_serv *serv) +static void svc_clear_pools(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; @@ -944,36 +946,41 @@ static void svc_clear_pools(struct svc_serv *serv) spin_lock_bh(&pool->sp_lock); list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { + if (xprt->xpt_net != net) + continue; list_del_init(&xprt->xpt_ready); } spin_unlock_bh(&pool->sp_lock); } } -static void svc_clear_list(struct list_head *xprt_list) +static void svc_clear_list(struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; struct svc_xprt *tmp; list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + if (xprt->xpt_net != net) + continue; svc_delete_xprt(xprt); } - BUG_ON(!list_empty(xprt_list)); + list_for_each_entry(xprt, xprt_list, xpt_list) + BUG_ON(xprt->xpt_net == net); } -void svc_close_all(struct svc_serv *serv) +void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(&serv->sv_tempsocks); - svc_close_list(&serv->sv_permsocks); + svc_close_list(&serv->sv_tempsocks, net); + svc_close_list(&serv->sv_permsocks, net); - svc_clear_pools(serv); + svc_clear_pools(serv, net); /* * At this point the sp_sockets lists will stay empty, since * svc_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - svc_clear_list(&serv->sv_tempsocks); - svc_clear_list(&serv->sv_permsocks); + svc_clear_list(&serv->sv_tempsocks, net); + svc_clear_list(&serv->sv_permsocks, net); } /* From 074d0f67cfe0af4927ce49560f403096b490c47f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 14:09:25 +0400 Subject: [PATCH 202/316] SUNRPC: service shutdown function in network namespace context introduced This function is enough for releasing resources, allocated for network namespace context, in case of sharing service between them. IOW, each service "user" (LockD, NFSd, etc), which wants to share service between network namespaces, have to release related resources by the function, introduced in this patch, instead of performing service shutdown (of course in case the service is shared already to the moment of release). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6cc0ea3d26f1..78abac48985b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -510,6 +510,24 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, } EXPORT_SYMBOL_GPL(svc_create_pooled); +void svc_shutdown_net(struct svc_serv *serv, struct net *net) +{ + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ + svc_close_net(serv, net); + + if (serv->sv_shutdown) + serv->sv_shutdown(serv, net); +} +EXPORT_SYMBOL_GPL(svc_shutdown_net); + /* * Destroy an RPC service. Should be called with appropriate locking to * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. @@ -532,16 +550,8 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ - svc_close_net(serv, net); + + svc_shutdown_net(serv, net); /* * The last user is gone and thus all sockets have to be destroyed to @@ -550,9 +560,6 @@ svc_destroy(struct svc_serv *serv) BUG_ON(!list_empty(&serv->sv_permsocks)); BUG_ON(!list_empty(&serv->sv_tempsocks)); - if (serv->sv_shutdown) - serv->sv_shutdown(serv, net); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) From c228fa2038a33bb3b87f567482124f452e162a71 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:07:48 +0400 Subject: [PATCH 203/316] Lockd: create permanent lockd sockets in current network namespace This patch parametrizes Lockd permanent sockets creation routine by network namespace context. It also replaces hard-coded init_net with current network namespace context in Lockd sockets creation routines. This approach looks safe, because Lockd is created during NFS mount (or NFS server start) and thus socket is required exactly in current network namespace context. But in the same time it means, that Lockd sockets inherits first Lockd requester network namespace. This issue will be fixed in further patches of the series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index ff379ff7761f..cba35984dde7 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -189,27 +189,29 @@ lockd(void *vrqstp) } static int create_lockd_listener(struct svc_serv *serv, const char *name, - const int family, const unsigned short port) + struct net *net, const int family, + const unsigned short port) { struct svc_xprt *xprt; - xprt = svc_find_xprt(serv, name, &init_net, family, 0); + xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, &init_net, family, port, + return svc_create_xprt(serv, name, net, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; } -static int create_lockd_family(struct svc_serv *serv, const int family) +static int create_lockd_family(struct svc_serv *serv, struct net *net, + const int family) { int err; - err = create_lockd_listener(serv, "udp", family, nlm_udpport); + err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); if (err < 0) return err; - return create_lockd_listener(serv, "tcp", family, nlm_tcpport); + return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); } /* @@ -222,16 +224,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family) * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ -static int make_socks(struct svc_serv *serv) +static int make_socks(struct svc_serv *serv, struct net *net) { static int warned; int err; - err = create_lockd_family(serv, PF_INET); + err = create_lockd_family(serv, net, PF_INET); if (err < 0) goto out_err; - err = create_lockd_family(serv, PF_INET6); + err = create_lockd_family(serv, net, PF_INET6); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; @@ -252,6 +254,7 @@ int lockd_up(void) { struct svc_serv *serv; int error = 0; + struct net *net = current->nsproxy->net_ns; mutex_lock(&nlmsvc_mutex); /* @@ -275,7 +278,7 @@ int lockd_up(void) goto out; } - error = make_socks(serv); + error = make_socks(serv, net); if (error < 0) goto destroy_and_out; From a9c5d73a8d8cb37601f8c39b35b9b4128e1a5254 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:07:57 +0400 Subject: [PATCH 204/316] Lockd: pernet usage counter introduced Lockd is going to be shared between network namespaces - i.e. going to be able to handle lock requests from different network namespaces. This means, that network namespace related resources have to be allocated not once (like now), but for every network namespace context, from which service is requested to operate. This patch implements Lockd per-net users accounting. New per-net counter is used to determine, when per-net resources have to be freed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/netns.h | 12 ++++++++++++ fs/lockd/svc.c | 47 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 fs/lockd/netns.h diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h new file mode 100644 index 000000000000..ce227e0fbc5c --- /dev/null +++ b/fs/lockd/netns.h @@ -0,0 +1,12 @@ +#ifndef __LOCKD_NETNS_H__ +#define __LOCKD_NETNS_H__ + +#include + +struct lockd_net { + unsigned int nlmsvc_users; +}; + +extern int lockd_net_id; + +#endif diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index cba35984dde7..73c9ebf09301 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -35,6 +35,8 @@ #include #include +#include "netns.h" + #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) #define ALLOWED_SIGS (sigmask(SIGKILL)) @@ -50,6 +52,8 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +int lockd_net_id; + /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 @@ -316,8 +320,12 @@ int lockd_up(void) destroy_and_out: svc_destroy(serv); out: - if (!error) + if (!error) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + ln->nlmsvc_users++; nlmsvc_users++; + } mutex_unlock(&nlmsvc_mutex); return error; } @@ -500,24 +508,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int, module_param(nsm_use_hostnames, bool, 0644); module_param(nlm_max_connections, uint, 0644); +static int lockd_init_net(struct net *net) +{ + return 0; +} + +static void lockd_exit_net(struct net *net) +{ +} + +static struct pernet_operations lockd_net_ops = { + .init = lockd_init_net, + .exit = lockd_exit_net, + .id = &lockd_net_id, + .size = sizeof(struct lockd_net), +}; + + /* * Initialising and terminating the module. */ static int __init init_nlm(void) { + int err; + #ifdef CONFIG_SYSCTL + err = -ENOMEM; nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); - return nlm_sysctl_table ? 0 : -ENOMEM; -#else - return 0; + if (nlm_sysctl_table == NULL) + goto err_sysctl; #endif + err = register_pernet_subsys(&lockd_net_ops); + if (err) + goto err_pernet; + return 0; + +err_pernet: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nlm_sysctl_table); +#endif +err_sysctl: + return err; } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); + unregister_pernet_subsys(&lockd_net_ops); #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); #endif From bb2224df5ffe4f864f5b696199b17db1ce77bc0a Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:08:05 +0400 Subject: [PATCH 205/316] Lockd: per-net up and down routines introduced This patch introduces per-net Lockd initialization and destruction routines. The logic is the same as in global Lockd up and down routines. Probably the solution is not the best one. But at least it looks clear. So per-net "up" routine are called only in case of lockd is running already. If per-net resources are not allocated yet, then service is being registered with local portmapper and lockd sockets created. Per-net "down" routine is called on every lockd_down() call in case of global users counter is not zero. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 47 ++++++++++++++++++++++++++++++++++++-- include/linux/sunrpc/svc.h | 2 ++ net/sunrpc/svc.c | 3 ++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 73c9ebf09301..90dec426bfd8 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -251,6 +251,45 @@ out_err: return err; } +static int lockd_up_net(struct net *net) +{ + struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; + int error; + + if (ln->nlmsvc_users) + return 0; + + error = svc_rpcb_setup(serv, net); + if (error) + goto err_rpcb; + + error = make_socks(serv, net); + if (error < 0) + goto err_socks; + return 0; + +err_socks: + svc_rpcb_cleanup(serv, net); +err_rpcb: + return error; +} + +static void lockd_down_net(struct net *net) +{ + struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; + + if (ln->nlmsvc_users) { + if (--ln->nlmsvc_users == 0) + svc_shutdown_net(serv, net); + } else { + printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", + nlmsvc_task, net); + BUG(); + } +} + /* * Bring up the lockd process if it's not already up. */ @@ -264,8 +303,10 @@ int lockd_up(void) /* * Check whether we're already up and running. */ - if (nlmsvc_rqst) + if (nlmsvc_rqst) { + error = lockd_up_net(net); goto out; + } /* * Sanity check: if there's no pid, @@ -339,8 +380,10 @@ lockd_down(void) { mutex_lock(&nlmsvc_mutex); if (nlmsvc_users) { - if (--nlmsvc_users) + if (--nlmsvc_users) { + lockd_down_net(current->nsproxy->net_ns); goto out; + } } else { printk(KERN_ERR "lockd_down: no users! task=%p\n", nlmsvc_task); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7b65495aa4ef..51b29ac45a8e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -414,6 +414,7 @@ struct svc_procedure { /* * Function prototypes. */ +int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *, struct net *net)); @@ -426,6 +427,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); +void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 78abac48985b..4153846984ac 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -369,7 +369,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) return &serv->sv_pools[pidx % serv->sv_nrpools]; } -static int svc_rpcb_setup(struct svc_serv *serv, struct net *net) +int svc_rpcb_setup(struct svc_serv *serv, struct net *net) { int err; @@ -381,6 +381,7 @@ static int svc_rpcb_setup(struct svc_serv *serv, struct net *net) svc_unregister(serv, net); return 0; } +EXPORT_SYMBOL_GPL(svc_rpcb_setup); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) { From 66697bfd6aec0a9ca9331c1aa544ac20324a7561 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:08:13 +0400 Subject: [PATCH 206/316] LockD: make nlm hosts network namespace aware This object depends on RPC client, and thus on network namespace. So let's make it's allocation and lookup in network namespace context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 3 ++- fs/lockd/host.c | 16 ++++++++++++++-- fs/nfs/client.c | 1 + include/linux/lockd/bind.h | 1 + include/linux/lockd/lockd.h | 4 +++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8d4ea8351e3d..ba1dc2eebd1e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, - nlm_init->hostname, nlm_init->noresvport); + nlm_init->hostname, nlm_init->noresvport, + nlm_init->net); if (host == NULL) { lockd_down(); return ERR_PTR(-ENOLCK); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 6f29836ec0cb..9ebd91dc42c3 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -17,6 +17,8 @@ #include #include +#include + #include #define NLMDBG_FACILITY NLMDBG_HOSTCACHE @@ -54,6 +56,7 @@ struct nlm_lookup_host_info { const char *hostname; /* remote's hostname */ const size_t hostname_len; /* it's length */ const int noresvport; /* use non-priv port */ + struct net *net; /* network namespace to bind */ }; /* @@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, INIT_LIST_HEAD(&host->h_reclaim); host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; + host->net = ni->net; out: return host; @@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname, - int noresvport) + int noresvport, + struct net *net) { struct nlm_lookup_host_info ni = { .server = 0, @@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .hostname = hostname, .hostname_len = strlen(hostname), .noresvport = noresvport, + .net = net, }; struct hlist_head *chain; struct hlist_node *pos; @@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, chain = &nlm_client_hosts[nlm_hash_address(sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { + if (host->net != net) + continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) continue; @@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); size_t src_len = rqstp->rq_daddrlen; + struct net *net = rqstp->rq_xprt->xpt_net; struct nlm_lookup_host_info ni = { .server = 1, .sap = svc_addr(rqstp), @@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, .version = rqstp->rq_vers, .hostname = hostname, .hostname_len = hostname_len, + .net = net, }; dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, @@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { + if (host->net != net) + continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) continue; @@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host) .to_retries = 5U, }; struct rpc_create_args args = { - .net = &init_net, + .net = host->net, .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2328dcbf6c0b..1a5cd49dff80 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -707,6 +707,7 @@ static int nfs_start_lockd(struct nfs_server *server) .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, + .net = clp->net, }; if (nlm_init.nfs_version > 3) diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index fbc48f898521..11a966e5f829 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -42,6 +42,7 @@ struct nlmclnt_initdata { unsigned short protocol; u32 nfs_version; int noresvport; + struct net *net; }; /* diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8949167a148d..94b3d13be426 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -67,6 +67,7 @@ struct nlm_host { struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ + struct net *net; /* host net */ }; /* @@ -222,7 +223,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname, - int noresvport); + int noresvport, + struct net *net); void nlmclnt_release_host(struct nlm_host *); struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const char *hostname, From 0e1cb5c0aad1c37a4eee6db45f52c0b3869db2cc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:08:21 +0400 Subject: [PATCH 207/316] LockD: make NSM network namespace aware NLM host is network namespace aware now. So NSM have to take it into account. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/mon.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index c196030e530a..7ef14b3c5bee 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } -static struct rpc_clnt *nsm_create(void) +static struct rpc_clnt *nsm_create(struct net *net) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), @@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void) return rpc_create(&args); } -static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) +static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, + struct net *net) { struct rpc_clnt *clnt; int status; @@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) .rpc_resp = res, }; - clnt = nsm_create(); + clnt = nsm_create(net); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); dprintk("lockd: failed to create NSM upcall transport, " @@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host) */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host) && nsm->sm_monitored && !nsm->sm_sticky) { dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); if (res.status != 0) status = -EIO; if (status < 0) From 3b64739fb928c34b13db6b5adcb0d3efb19e78be Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 31 Jan 2012 15:08:29 +0400 Subject: [PATCH 208/316] Lockd: shutdown NLM hosts in network namespace context Lockd now managed in network namespace context. And this patch introduces network namespace related NLM hosts shutdown in case of releasing per-net Lockd resources. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 26 +++++++++++++++++++------- fs/lockd/svc.c | 4 +++- include/linux/lockd/lockd.h | 1 + 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 9ebd91dc42c3..eb75ca7c2d6e 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -565,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info) nsm_release(nsm); } -/* - * Shut down the hosts module. - * Note that this routine is called only at server shutdown time. - */ void -nlm_shutdown_hosts(void) +nlm_shutdown_hosts_net(struct net *net) { struct hlist_head *chain; struct hlist_node *pos; @@ -582,6 +578,8 @@ nlm_shutdown_hosts(void) /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts...\n"); for_each_host(host, pos, chain, nlm_server_hosts) { + if (net && host->net != net) + continue; host->h_expires = jiffies - 1; if (host->h_rpcclnt) { rpc_shutdown_client(host->h_rpcclnt); @@ -592,15 +590,29 @@ nlm_shutdown_hosts(void) /* Then, perform a garbage collection pass */ nlm_gc_hosts(); mutex_unlock(&nlm_host_mutex); +} + +/* + * Shut down the hosts module. + * Note that this routine is called only at server shutdown time. + */ +void +nlm_shutdown_hosts(void) +{ + struct hlist_head *chain; + struct hlist_node *pos; + struct nlm_host *host; + + nlm_shutdown_hosts_net(NULL); /* complain if any hosts are left */ if (nrhosts != 0) { printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); dprintk("lockd: %lu hosts left:\n", nrhosts); for_each_host(host, pos, chain, nlm_server_hosts) { - dprintk(" %s (cnt %d use %d exp %ld)\n", + dprintk(" %s (cnt %d use %d exp %ld net %p)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires); + host->h_inuse, host->h_expires, host->net); } } } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 90dec426bfd8..2774e1013b34 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -281,8 +281,10 @@ static void lockd_down_net(struct net *net) struct svc_serv *serv = nlmsvc_rqst->rq_server; if (ln->nlmsvc_users) { - if (--ln->nlmsvc_users == 0) + if (--ln->nlmsvc_users == 0) { + nlm_shutdown_hosts_net(net); svc_shutdown_net(serv, net); + } } else { printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", nlmsvc_task, net); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 94b3d13be426..f04ce6ac6d04 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -234,6 +234,7 @@ struct rpc_clnt * nlm_bind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_shutdown_hosts(void); +void nlm_shutdown_hosts_net(struct net *net); void nlm_host_rebooted(const struct nlm_reboot *); /* From 4c03ae4a897b52e0e8fc38749606549eaa20d5b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Feb 2012 00:05:11 -0500 Subject: [PATCH 209/316] NFS: Initialise the nfs_net->nfs_client_lock Ensure that we initialise the nfs_net->nfs_client_lock spinlock. Also ensure that nfs_server_remove_lists() doesn't try to dereference server->nfs_client before that is initialised. Signed-off-by: Trond Myklebust Cc: Stanislav Kinsbursky --- fs/nfs/client.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1a5cd49dff80..6f6267cb6bad 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1055,11 +1055,14 @@ static void nfs_server_insert_lists(struct nfs_server *server) static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + struct nfs_net *nn; + if (clp == NULL) + return; + nn = net_generic(clp->net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); - if (clp && list_empty(&clp->cl_superblocks)) + if (list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); spin_unlock(&nn->nfs_client_lock); @@ -1777,6 +1780,7 @@ void nfs_clients_init(struct net *net) #ifdef CONFIG_NFS_V4 idr_init(&nn->cb_ident_idr); #endif + spin_lock_init(&nn->nfs_client_lock); } #ifdef CONFIG_PROC_FS From b6d1e83b4ea6cb369bdd490871f00651decdb509 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 7 Feb 2012 19:53:19 +0400 Subject: [PATCH 210/316] NFS: fix nfs4_find_client_sessionid() arguments list It's not compilable in case of CONFIG_NFS_V4_1 is not set. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6f6267cb6bad..d0f850ffeb19 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1266,7 +1266,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, #else /* CONFIG_NFS_V4_1 */ struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *addr, +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs4_sessionid *sid) { return NULL; From 7ced286e0ade171af89d32c22b1590e1ca480542 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 7 Feb 2012 11:49:11 -0500 Subject: [PATCH 211/316] NFS: add mount options 'v4.0' and 'v4.1' Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 94667848af9a..d18a90ba165f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -80,7 +80,7 @@ enum { Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, - Opt_v2, Opt_v3, Opt_v4, + Opt_v2, Opt_v3, Opt_v4, Opt_v4_0, Opt_v4_1, Opt_udp, Opt_tcp, Opt_rdma, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, @@ -136,6 +136,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_v2, "v2" }, { Opt_v3, "v3" }, { Opt_v4, "v4" }, + { Opt_v4_0, "v4.0" }, + { Opt_v4_1, "v4.1" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, { Opt_rdma, "rdma" }, @@ -1172,6 +1174,16 @@ static int nfs_parse_mount_options(char *raw, mnt->flags &= ~NFS_MOUNT_VER3; mnt->version = 4; break; + case Opt_v4_0: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + mnt->minorversion = 0; + break; + case Opt_v4_1: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + mnt->minorversion = 1; + break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; From d073e9b541e1ac3f52d72c3a153855d9a9ee3278 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Feb 2012 14:59:05 -0500 Subject: [PATCH 212/316] NFSv4: Reduce the footprint of the idmapper Instead of pre-allocating the storage for all the strings, we can significantly reduce the size of that table by doing the allocation when we do the downcall. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/idmap.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 5a5566fa1619..fff79481218c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -362,7 +362,7 @@ struct idmap_hashent { unsigned long ih_expires; __u32 ih_id; size_t ih_namelen; - char ih_name[IDMAP_NAMESZ]; + const char *ih_name; }; struct idmap_hashtable { @@ -482,12 +482,17 @@ void nfs_idmap_delete(struct nfs_client *clp) { struct idmap *idmap = clp->cl_idmap; + int i; if (!idmap) return; nfs_idmap_unregister(clp, idmap->idmap_pipe); rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; + for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++) + kfree(idmap->idmap_user_hash.h_entries[i].ih_name); + for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++) + kfree(idmap->idmap_group_hash.h_entries[i].ih_name); kfree(idmap); } @@ -634,9 +639,14 @@ static void idmap_update_entry(struct idmap_hashent *he, const char *name, size_t namelen, __u32 id) { + char *str = kmalloc(namelen + 1, GFP_KERNEL); + if (str == NULL) + return; + kfree(he->ih_name); he->ih_id = id; - memcpy(he->ih_name, name, namelen); - he->ih_name[namelen] = '\0'; + memcpy(str, name, namelen); + str[namelen] = '\0'; + he->ih_name = str; he->ih_namelen = namelen; he->ih_expires = jiffies + nfs_idmap_cache_timeout; } From e3da87066f950076fe274b58f0d0adc7d9f9d412 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2012 13:21:38 -0500 Subject: [PATCH 213/316] NFSv4: The idmapper now depends on keyring functionality Add the appropriate 'select KEYS' to the NFSv4 Kconfig entry. Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 021d2cf6938a..ee86cfcd6c33 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -64,6 +64,7 @@ config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS + select KEYS help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. @@ -130,5 +131,4 @@ config NFS_USE_KERNEL_DNS bool depends on NFS_V4 && !NFS_USE_LEGACY_DNS select DNS_RESOLVER - select KEYS default y From 685f50f9188ac1e8244d0340a9d6ea36b6136cec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2012 13:39:15 -0500 Subject: [PATCH 214/316] NFSv4: Further reduce the footprint of the idmapper Don't allocate the legacy idmapper tables until we actually need them. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/idmap.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index fff79481218c..b5c6d8eb7e03 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -367,7 +367,7 @@ struct idmap_hashent { struct idmap_hashtable { __u8 h_type; - struct idmap_hashent h_entries[IDMAP_HASH_SZ]; + struct idmap_hashent *h_entries; }; struct idmap { @@ -478,21 +478,40 @@ nfs_idmap_new(struct nfs_client *clp) return 0; } +static void +idmap_alloc_hashtable(struct idmap_hashtable *h) +{ + if (h->h_entries != NULL) + return; + h->h_entries = kcalloc(IDMAP_HASH_SZ, + sizeof(*h->h_entries), + GFP_KERNEL); +} + +static void +idmap_free_hashtable(struct idmap_hashtable *h) +{ + int i; + + if (h->h_entries == NULL) + return; + for (i = 0; i < IDMAP_HASH_SZ; i++) + kfree(h->h_entries[i].ih_name); + kfree(h->h_entries); +} + void nfs_idmap_delete(struct nfs_client *clp) { struct idmap *idmap = clp->cl_idmap; - int i; if (!idmap) return; nfs_idmap_unregister(clp, idmap->idmap_pipe); rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; - for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++) - kfree(idmap->idmap_user_hash.h_entries[i].ih_name); - for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++) - kfree(idmap->idmap_group_hash.h_entries[i].ih_name); + idmap_free_hashtable(&idmap->idmap_user_hash); + idmap_free_hashtable(&idmap->idmap_group_hash); kfree(idmap); } @@ -586,6 +605,8 @@ void nfs_idmap_quit(void) static inline struct idmap_hashent * idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) { + if (h->h_entries == NULL) + return NULL; return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; } @@ -594,6 +615,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) { struct idmap_hashent *he = idmap_name_hash(h, name, len); + if (he == NULL) + return NULL; if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) return NULL; if (time_after(jiffies, he->ih_expires)) @@ -604,6 +627,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) static inline struct idmap_hashent * idmap_id_hash(struct idmap_hashtable* h, __u32 id) { + if (h->h_entries == NULL) + return NULL; return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; } @@ -611,6 +636,9 @@ static struct idmap_hashent * idmap_lookup_id(struct idmap_hashtable *h, __u32 id) { struct idmap_hashent *he = idmap_id_hash(h, id); + + if (he == NULL) + return NULL; if (he->ih_id != id || he->ih_namelen == 0) return NULL; if (time_after(jiffies, he->ih_expires)) @@ -626,12 +654,14 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id) static inline struct idmap_hashent * idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) { + idmap_alloc_hashtable(h); return idmap_name_hash(h, name, len); } static inline struct idmap_hashent * idmap_alloc_id(struct idmap_hashtable *h, __u32 id) { + idmap_alloc_hashtable(h); return idmap_id_hash(h, id); } From 2f09c24216cd789653eb8efbf8be88409eb8d581 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2012 22:01:15 -0500 Subject: [PATCH 215/316] SUNRPC: Ensure that we can trace waitqueues when !defined(CONFIG_SYSCTL) The tracepoint code relies on the queue->name being defined in order to be able to display the name of the waitqueue on which an RPC task is sleeping. Reported-by: Randy Dunlap Reported-by: Steven Rostedt Signed-off-by: Trond Myklebust Acked-by: Steven Rostedt Acked-by: Randy Dunlap --- include/linux/sunrpc/debug.h | 3 +++ include/linux/sunrpc/sched.h | 15 +++++++++++++-- net/sunrpc/sched.c | 4 +--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index c2786f20016f..2a11eb278f64 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -34,6 +34,9 @@ #ifdef CONFIG_SYSCTL #define RPC_DEBUG #endif +#ifdef CONFIG_TRACEPOINTS +#define RPC_TRACEPOINTS +#endif /* #define RPC_PROFILE */ /* diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f7b2df5252b0..22dfc24013b6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -195,7 +195,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) const char * name; #endif }; @@ -270,11 +270,22 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri return (task->tk_priority + RPC_PRIORITY_LOW == prio); } -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); } + +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ + q->name = name; +} +#else +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ +} #endif #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d79c63df49b8..1c570a81096a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -208,9 +208,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->qlen = 0; setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); INIT_LIST_HEAD(&queue->timer_list.list); -#ifdef RPC_DEBUG - queue->name = qname; -#endif + rpc_assign_waitqueue_name(queue, qname); } void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) From 571b7554016941ef0f0c3c61be72561e2bc55f5e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Feb 2012 14:06:41 -0500 Subject: [PATCH 216/316] NFS: dont allow minorversion= opt when vers != 4 Don't allow invalid 'vers' and 'minorversion' combinations in mount options, such as "vers=3,minorversion=1". Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d18a90ba165f..d05024a18984 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1531,6 +1531,9 @@ static int nfs_parse_mount_options(char *raw, if (!sloppy && invalid_option) return 0; + if (mnt->minorversion && mnt->version != 4) + goto out_minorversion_mismatch; + /* * verify that any proto=/mountproto= options match the address * familiies in the addr=/mountaddr= options. @@ -1564,6 +1567,10 @@ out_invalid_address: out_invalid_value: printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); return 0; +out_minorversion_mismatch: + printk(KERN_INFO "NFS: mount option vers=%u does not support " + "minorversion=%u\n", mnt->version, mnt->minorversion); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; From b4b9a0c1c89464dabafef974960f509ce33ae1c0 Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Wed, 15 Feb 2012 19:38:25 +0400 Subject: [PATCH 217/316] nfs41: Verify channel's attributes accordingly to RFC v2 ca_maxoperations: For the backchannel, the server MUST NOT change the value the client offers. For the fore channel, the server MAY change the requested value. ca_maxrequests: For the backchannel, the server MUST NOT change the value the client offers. For the fore channel, the server MAY change the requested value. Signed-off-by: Vitaliy Gusev Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b3316541734..87c584dd88b1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5287,6 +5287,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args return -EINVAL; if (rcvd->max_reqs == 0) return -EINVAL; + if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE) + rcvd->max_reqs = NFS4_MAX_SLOT_TABLE; return 0; } @@ -5302,9 +5304,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; /* These would render the backchannel useless: */ - if (rcvd->max_ops == 0) + if (rcvd->max_ops != sent->max_ops) return -EINVAL; - if (rcvd->max_reqs == 0) + if (rcvd->max_reqs != sent->max_reqs) return -EINVAL; return 0; } From 1d96e80faff57b9eefa283b35716a384bbb3d5b3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 16 Feb 2012 17:42:12 +0400 Subject: [PATCH 218/316] SUNRPC: init per-net rpcbind spinlock Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/sunrpc_syms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index d16ac088f6d8..21d106e2ca06 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -50,6 +50,7 @@ static __net_init int sunrpc_init_net(struct net *net) rpc_pipefs_init_net(net); INIT_LIST_HEAD(&sn->all_clients); spin_lock_init(&sn->rpc_client_lock); + spin_lock_init(&sn->rpcb_clnt_lock); return 0; err_unixgid: From 15a4520621824a3c2eb2de2d1f3984bc1663d3c8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 14 Feb 2012 16:19:18 -0500 Subject: [PATCH 219/316] SUNRPC: add sending,pending queue and max slot to xprt stats With static RPC slots, the xprt backlog queue stats were useful in showing when the transport (TCP) was starved by lack of RPC slots. The new dynamic RPC slot code, commit d9ba131d8f58c0d2ff5029e7002ab43f913b36f9, always provides an RPC slot and so only uses the xprt backlog queue when the tcp_max_slot_table_entries value has been hit or when an allocation error occurs. All requests are now placed on the xprt sending or pending queue which need to be monitored for debugging. The max_slot stat shows the maximum number of dynamic RPC slots reached which is useful when debugging performance issues. Add the new fields at the end of the mountstats xprt stanza so that mountstats outputs the previous correct values and ignores the new fields. Bump NFS_IOSTATS_VERS. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- include/linux/nfs_iostat.h | 2 +- include/linux/sunrpc/xprt.h | 7 +++++-- net/sunrpc/xprt.c | 7 ++++++- net/sunrpc/xprtsock.c | 23 +++++++++++++++++------ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 8866bb3502ee..9dcbbe9a51fb 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -21,7 +21,7 @@ #ifndef _LINUX_NFS_IOSTAT #define _LINUX_NFS_IOSTAT -#define NFS_IOSTAT_VERS "1.0" +#define NFS_IOSTAT_VERS "1.1" /* * NFS byte counters diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b033f366d5f6..ea712f97f4a1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -219,10 +219,13 @@ struct rpc_xprt { connect_time, /* jiffies waiting for connect */ sends, /* how many complete requests */ recvs, /* how many complete requests */ - bad_xids; /* lookup_rqst didn't find XID */ + bad_xids, /* lookup_rqst didn't find XID */ + max_slots; /* max rpc_slots used */ unsigned long long req_u, /* average requests on the wire */ - bklog_u; /* backlog queue utilization */ + bklog_u, /* backlog queue utilization */ + sending_u, /* send q utilization */ + pending_u; /* pend q utilization */ } stat; struct net *xprt_net; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index efe5495ecf65..739df8a11382 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -885,7 +885,7 @@ void xprt_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int status, numreqs; dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); @@ -922,9 +922,14 @@ void xprt_transmit(struct rpc_task *task) xprt->ops->set_retrans_timeout(task); + numreqs = atomic_read(&xprt->num_reqs); + if (numreqs > xprt->stat.max_slots) + xprt->stat.max_slots = numreqs; xprt->stat.sends++; xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; xprt->stat.bklog_u += xprt->backlog.qlen; + xprt->stat.sending_u += xprt->sending.qlen; + xprt->stat.pending_u += xprt->pending.qlen; /* Don't race with disconnect */ if (!xprt_connected(xprt)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 55472c48825e..4c8281d29e2b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2227,7 +2227,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " - "%llu %llu\n", + "%llu %llu %lu %llu %llu\n", xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2236,7 +2236,10 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /** @@ -2249,14 +2252,18 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", + seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " + "%lu %llu %llu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /** @@ -2273,7 +2280,8 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; - seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", + seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu %lu %llu %llu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, @@ -2283,7 +2291,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /* From dbb9c2a22d32492544765e798386daa2d9da27d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Feb 2012 16:35:08 -0500 Subject: [PATCH 220/316] SUNRPC: Use KERN_DEFAULT for debugging printk's Our dprintk() debugging facility doesn't specify any verbosity level for it's printk() calls, but it should. The default verbosity for printk's is KERN_DEFAULT. You might argue that these are debugging printk's and thus the verbosity should be KERN_DEBUG. That would mean that to see NFS and SUNRPC debugging output an admin would also have to boost the syslog verbosity, which would be insufferably noisy. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 2a11eb278f64..b506936f4ce6 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -54,7 +54,11 @@ extern unsigned int nlm_debug; #undef ifdebug #ifdef RPC_DEBUG # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) -# define dfprintk(fac, args...) do { ifdebug(fac) printk(args); } while(0) +# define dfprintk(fac, args...) \ + do { \ + ifdebug(fac) \ + printk(KERN_DEFAULT args); \ + } while (0) # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) From d7c32675021bd750d8e0e726f2f81f746e8cab01 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Feb 2012 16:35:17 -0500 Subject: [PATCH 221/316] nfs: Clean up debugging in nfs_follow_mountpoint() Clean up: Fix a debugging message which had an obsolete function name in it (nfs_follow_mountpoint). Introduced by commit 36d43a43 "NFS: Use d_automount() rather than abusing follow_link()" (January 14, 2011) Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 8102391bb374..1807866bb3ab 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -276,7 +276,10 @@ out: nfs_free_fattr(fattr); nfs_free_fhandle(fh); out_nofree: - dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); + if (IS_ERR(mnt)) + dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt)); + else + dprintk("<-- %s() = %p\n", __func__, mnt); return mnt; } From b6bf6e7d6f6fae1ddcae9e02dfe676bdc8fe892c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 17 Feb 2012 13:05:23 -0500 Subject: [PATCH 222/316] NFSv4.1 set highest_used_slotid to NFS4_NO_SLOT Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e37818a34ef..c1111a37dc14 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -212,7 +212,7 @@ static void nfs4_end_drain_session(struct nfs_client *clp) static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { spin_lock(&tbl->slot_tbl_lock); - if (tbl->highest_used_slotid != -1) { + if (tbl->highest_used_slotid != NFS4_NO_SLOT) { INIT_COMPLETION(tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); From 0a702195234eb77c4097148285cccf7f095de9cf Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 17 Feb 2012 13:15:24 -0500 Subject: [PATCH 223/316] NFS: include filelayout DS rpc stats in mountstats Include RPC statistics from all data servers in /proc/self/mountstats for pNFS filelayout mounts. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 19 +++++++++++++++++++ include/linux/sunrpc/metrics.h | 6 ++++-- include/linux/sunrpc/sched.h | 1 + net/sunrpc/stats.c | 8 ++++---- net/sunrpc/xprt.c | 5 ++++- 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 79be7acc9bae..47e8f3435d38 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -33,6 +33,8 @@ #include #include +#include + #include "internal.h" #include "nfs4filelayout.h" @@ -189,6 +191,13 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) rdata->mds_ops->rpc_call_done(task, data); } +static void filelayout_read_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); +} + static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; @@ -268,6 +277,13 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) wdata->mds_ops->rpc_call_done(task, data); } +static void filelayout_write_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); +} + static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; @@ -288,18 +304,21 @@ static void filelayout_commit_release(void *data) struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, + .rpc_count_stats = filelayout_read_count_stats, .rpc_release = filelayout_read_release, }; struct rpc_call_ops filelayout_write_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, + .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_write_release, }; struct rpc_call_ops filelayout_commit_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, + .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_commit_release, }; diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index b6edbc0ea83d..1565bbe86d51 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -74,14 +74,16 @@ struct rpc_clnt; #ifdef CONFIG_PROC_FS struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); -void rpc_count_iostats(struct rpc_task *); +void rpc_count_iostats(const struct rpc_task *, + struct rpc_iostats *); void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); void rpc_free_iostats(struct rpc_iostats *); #else /* CONFIG_PROC_FS */ static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } -static inline void rpc_count_iostats(struct rpc_task *task) {} +static inline void rpc_count_iostats(const struct rpc_task *task, + struct rpc_iostats *stats) {} static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} static inline void rpc_free_iostats(struct rpc_iostats *stats) {} diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 22dfc24013b6..dc0c3cc3ada3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -103,6 +103,7 @@ typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { void (*rpc_call_prepare)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); + void (*rpc_count_stats)(struct rpc_task *, void *); void (*rpc_release)(void *); }; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 3c4f6888c891..1eb3304bc105 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -133,20 +133,19 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); /** * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task + * @stats: array of stat structures * * Relies on the caller for serialization. */ -void rpc_count_iostats(struct rpc_task *task) +void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats; struct rpc_iostats *op_metrics; ktime_t delta; - if (!task->tk_client || !task->tk_client->cl_metrics || !req) + if (!stats || !req) return; - stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; @@ -164,6 +163,7 @@ void rpc_count_iostats(struct rpc_task *task) delta = ktime_sub(ktime_get(), task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); } +EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 739df8a11382..32e37945a840 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1137,7 +1137,10 @@ void xprt_release(struct rpc_task *task) return; xprt = req->rq_xprt; - rpc_count_iostats(task); + if (task->tk_ops->rpc_count_stats != NULL) + task->tk_ops->rpc_count_stats(task, task->tk_calldata); + else if (task->tk_client) + rpc_count_iostats(task, task->tk_client->cl_metrics); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) From f86f36a6ae625eda87a13e1ea102a908e08f491b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Feb 2012 20:33:19 -0500 Subject: [PATCH 224/316] NFSv4.1: Fix a NFSv4.1 session initialisation regression Commit aacd553 (NFSv4.1: cleanup init and reset of session slot tables) introduces a regression in the session initialisation code. New tables now find their sequence ids initialised to 0, rather than the mandated value of 1 (see RFC5661). Fix the problem by merging nfs4_reset_slot_table() and nfs4_init_slot_table(). Since the tbl->max_slots is initialised to 0, the test in nfs4_reset_slot_table for max_reqs != tbl->max_slots will automatically pass for an empty table. Reported-by: Vitaliy Gusev Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 107 ++++++++++++++++++---------------------------- 1 file changed, 42 insertions(+), 65 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d202e04aca94..b4d67feab90b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5008,37 +5008,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } +static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) +{ + return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags); +} + +static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, + struct nfs4_slot *new, + u32 max_slots, + u32 ivalue) +{ + struct nfs4_slot *old = NULL; + u32 i; + + spin_lock(&tbl->slot_tbl_lock); + if (new) { + old = tbl->slots; + tbl->slots = new; + tbl->max_slots = max_slots; + } + tbl->highest_used_slotid = -1; /* no slot is currently used */ + for (i = 0; i < tbl->max_slots; i++) + tbl->slots[i].seq_nr = ivalue; + spin_unlock(&tbl->slot_tbl_lock); + kfree(old); +} + /* - * Reset a slot table + * (re)Initialise a slot table */ -static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, - int ivalue) +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, + u32 ivalue) { struct nfs4_slot *new = NULL; - int i; - int ret = 0; + int ret = -ENOMEM; dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, max_reqs, tbl->max_slots); /* Does the newly negotiated max_reqs match the existing slot table? */ if (max_reqs != tbl->max_slots) { - ret = -ENOMEM; - new = kmalloc(max_reqs * sizeof(struct nfs4_slot), - GFP_NOFS); + new = nfs4_alloc_slots(max_reqs, GFP_NOFS); if (!new) goto out; - ret = 0; - kfree(tbl->slots); } - spin_lock(&tbl->slot_tbl_lock); - if (new) { - tbl->slots = new; - tbl->max_slots = max_reqs; - } - for (i = 0; i < tbl->max_slots; ++i) - tbl->slots[i].seq_nr = ivalue; - spin_unlock(&tbl->slot_tbl_lock); + ret = 0; + + nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); out: @@ -5060,36 +5076,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session) return; } -/* - * Initialize slot table - */ -static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, - int max_slots, int ivalue) -{ - struct nfs4_slot *slot; - int ret = -ENOMEM; - - BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE); - - dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); - - slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS); - if (!slot) - goto out; - ret = 0; - - spin_lock(&tbl->slot_tbl_lock); - tbl->max_slots = max_slots; - tbl->slots = slot; - tbl->highest_used_slotid = -1; /* no slot is currently used */ - spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, - tbl, tbl->slots, tbl->max_slots); -out: - dprintk("<-- %s: return %d\n", __func__, ret); - return ret; -} - /* * Initialize or reset the forechannel and backchannel tables */ @@ -5101,25 +5087,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) dprintk("--> %s\n", __func__); /* Fore channel */ tbl = &ses->fc_slot_table; - if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) /* -ENOMEM */ - return status; - } else { - status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) - return status; - } + status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); + if (status) /* -ENOMEM */ + return status; /* Back channel */ tbl = &ses->bc_slot_table; - if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); - if (status) - /* Fore and back channel share a connection so get - * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); - } else - status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + if (status && tbl->slots == NULL) + /* Fore and back channel share a connection so get + * both slot tables or neither */ + nfs4_destroy_slot_tables(ses); return status; } From abe9a6d57b4544ac208401f9c0a4262814db2be4 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 16 Feb 2012 11:17:05 -0500 Subject: [PATCH 225/316] NFSv4: fix server_scope memory leak server_scope would never be freed if nfs4_check_cl_exchange_flags() returned non-zero Signed-off-by: Weston Andros Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b4d67feab90b..ec9f6ef6c5dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_auth->au_flavor); res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); - if (unlikely(!res.server_scope)) - return -ENOMEM; + if (unlikely(!res.server_scope)) { + status = -ENOMEM; + goto out; + } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) @@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->server_scope = NULL; } - if (!clp->server_scope) + if (!clp->server_scope) { clp->server_scope = res.server_scope; - else - kfree(res.server_scope); + goto out; + } } - + kfree(res.server_scope); +out: dprintk("<-- %s status= %d\n", __func__, status); return status; } From 9937347a1ee6a67e450cc9e90750ce0b10abfe75 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 19 Feb 2012 08:44:07 +0100 Subject: [PATCH 226/316] NFS: Ensure that the nfs_client 'net' field is always set Currently, the nfs_parsed_mount_data->net field is initialised in the nfs_parse_mount_options() function, which means that it only gets set if we're using text based mounts. The legacy binary mount interface is therefore broken. Fix is to initialise the ->net field in nfs_alloc_parsed_mount_data. Signed-off-by: Trond Myklebust Cc: Stanislav Kinsbursky --- fs/nfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d05024a18984..6708f3044eb0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -911,6 +911,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->auth_flavor_len = 1; data->version = version; data->minorversion = 0; + data->net = current->nsproxy->net_ns; security_init_mnt_opts(&data->lsm_opts); } return data; @@ -1110,8 +1111,6 @@ static int nfs_parse_mount_options(char *raw, free_secdata(secdata); - mnt->net = current->nsproxy->net_ns; - while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; unsigned long option; From abd96698613eb27415e7028b6100be930920adc6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 19 Feb 2012 08:46:49 +0100 Subject: [PATCH 227/316] NFS: Ensure struct nfs_client holds a reference to the net namespace Otherwise we have no guarantee that the net namespace won't just disappear from underneath us once the task that created it is destroyed. Signed-off-by: Trond Myklebust Cc: Stanislav Kinsbursky --- fs/nfs/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d0f850ffeb19..8563585cccec 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -172,7 +172,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; - clp->net = cl_init->net; + clp->net = get_net(cl_init->net); #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -300,6 +300,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); + put_net(clp->net); kfree(clp->cl_hostname); kfree(clp->server_scope); kfree(clp); From 7df529af5fb4b4064f8cd62629e259ac79c0b4ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Feb 2012 17:34:22 -0500 Subject: [PATCH 228/316] NFSv4.1: Don't call nfs4_deviceid_purge_client() unless we're NFSv4.1 Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 7 ++++--- fs/nfs/pnfs.h | 3 --- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8563585cccec..592b5583aa3a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -204,8 +204,11 @@ error_0: #ifdef CONFIG_NFS_V4_1 static void nfs4_shutdown_session(struct nfs_client *clp) { - if (nfs4_has_session(clp)) + if (nfs4_has_session(clp)) { + nfs4_deviceid_purge_client(clp); nfs4_destroy_session(clp->cl_session); + } + } #else /* CONFIG_NFS_V4_1 */ static void nfs4_shutdown_session(struct nfs_client *clp) @@ -298,8 +301,6 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); - nfs4_deviceid_purge_client(clp); - put_net(clp->net); kfree(clp->cl_hostname); kfree(clp->server_scope); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 53d593a0a4f2..8088d51f495e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -426,9 +426,6 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) return 0; } -static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) -{ -} #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ From da3b462296e421e8f54b54b7d2706488661c36e2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 27 Feb 2012 22:05:29 +0400 Subject: [PATCH 229/316] SUNRPC: release per-net clients lock before calling PipeFS dentries creation v3: 1) Lookup for client is performed from the beginning of the list on each PipeFS event handling operation. Lockdep is sad otherwise, because inode mutex is taken on PipeFS dentry creation, which can be called on mount notification, where this per-net client lock is taken on clients list walk. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index bb7ed2f3aee6..25c3da53fb69 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -204,21 +204,37 @@ static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, return err; } +static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct rpc_clnt *clnt; + + spin_lock(&sn->rpc_client_lock); + list_for_each_entry(clnt, &sn->all_clients, cl_clients) { + if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || + ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) + continue; + atomic_inc(&clnt->cl_count); + spin_unlock(&sn->rpc_client_lock); + return clnt; + } + spin_unlock(&sn->rpc_client_lock); + return NULL; +} + static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct super_block *sb = ptr; struct rpc_clnt *clnt; int error = 0; - struct sunrpc_net *sn = net_generic(sb->s_fs_info, sunrpc_net_id); - spin_lock(&sn->rpc_client_lock); - list_for_each_entry(clnt, &sn->all_clients, cl_clients) { + while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clnt, event, sb); + rpc_release_client(clnt); if (error) break; } - spin_unlock(&sn->rpc_client_lock); return error; } From e9dbca8d732e20b8d31a3094a8669c014e7ee262 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 27 Feb 2012 22:05:37 +0400 Subject: [PATCH 230/316] NFS: release per-net clients lock before calling PipeFS dentries creation v3: 1) Lookup for client is performed from the beginning of the list on each PipeFS event handling operation. Lockdep is sad otherwise, because inode mutex is taken on PipeFS dentry creation, which can be called on mount notification, where this per-net client lock is taken on clients list walk. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b5c6d8eb7e03..d4db3b6f4b8e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -553,23 +553,41 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, return err; } -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) +static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) { - struct super_block *sb = ptr; - struct nfs_net *nn = net_generic(sb->s_fs_info, nfs_net_id); + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *cl_dentry; struct nfs_client *clp; - int error = 0; spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (clp->rpc_ops != &nfs_v4_clientops) continue; + cl_dentry = clp->cl_idmap->idmap_pipe->dentry; + if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || + ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) + continue; + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; + } + spin_unlock(&nn->nfs_client_lock); + return NULL; +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct nfs_client *clp; + int error = 0; + + while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clp, event, sb); + nfs_put_client(clp); if (error) break; } - spin_unlock(&nn->nfs_client_lock); return error; } From 2c9030eef9dbd0d737a7f55646da70d217fd6255 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 27 Feb 2012 22:05:45 +0400 Subject: [PATCH 231/316] SUNRPC: check RPC inode's pipe reference before dereferencing There are 2 tightly bound objects: pipe data (created for kernel needs, has reference to dentry, which depends on PipeFS mount/umount) and PipeFS dentry/inode pair (created on mount for user-space needs). They both independently may have or have not a valid reference to each other. This means, that we have to make sure, that pipe->dentry reference is valid on upcalls, and dentry->pipe reference is valid on downcalls. The latter check is absent - my fault. IOW, PipeFS dentry can be opened by some process (rpc.idmapd for example), but it's pipe data can belong to NFS mount, which was unmounted already and thus pipe data was destroyed. To fix this, pipe reference have to be set to NULL on rpc_unlink() and checked on PipeFS file operations instead of pipe->dentry check. Note: PipeFS "poll" file operation will be updated in next patch, because it's logic is more complicated. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6873c9b51cc9..b67b2aecc4ff 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -174,6 +174,7 @@ rpc_close_pipes(struct inode *inode) pipe->ops->release_pipe(inode); cancel_delayed_work_sync(&pipe->queue_timeout); rpc_inode_setowner(inode, NULL); + RPC_I(inode)->pipe = NULL; mutex_unlock(&inode->i_mutex); } @@ -203,12 +204,13 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_pipe *pipe; int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); - if (pipe->dentry == NULL) + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) goto out; first_open = pipe->nreaders == 0 && pipe->nwriters == 0; if (first_open && pipe->ops->open_pipe) { @@ -229,12 +231,13 @@ out: static int rpc_pipe_release(struct inode *inode, struct file *filp) { - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int last_close; mutex_lock(&inode->i_mutex); - if (pipe->dentry == NULL) + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) goto out; msg = filp->private_data; if (msg != NULL) { @@ -270,12 +273,13 @@ static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; mutex_lock(&inode->i_mutex); - if (pipe->dentry == NULL) { + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) { res = -EPIPE; goto out_unlock; } @@ -313,13 +317,12 @@ static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_pipe *pipe = RPC_I(inode)->pipe; int res; mutex_lock(&inode->i_mutex); res = -EPIPE; - if (pipe->dentry != NULL) - res = pipe->ops->downcall(filp, buf, len); + if (RPC_I(inode)->pipe != NULL) + res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; } @@ -344,16 +347,18 @@ static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_pipe *pipe; int len; switch (cmd) { case FIONREAD: - spin_lock(&pipe->lock); - if (pipe->dentry == NULL) { - spin_unlock(&pipe->lock); + mutex_lock(&inode->i_mutex); + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) { + mutex_unlock(&inode->i_mutex); return -EPIPE; } + spin_lock(&pipe->lock); len = pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; @@ -361,6 +366,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) len += msg->len - msg->copied; } spin_unlock(&pipe->lock); + mutex_unlock(&inode->i_mutex); return put_user(len, (int __user *)arg); default: return -EINVAL; From 591ad7feaec5417681b4112f8df52fc43bb7c92e Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 27 Feb 2012 22:05:54 +0400 Subject: [PATCH 232/316] SUNRPC: move waitq from RPC pipe to RPC inode Currently, wait queue, used for polling of RPC pipe changes from user-space, is a part of RPC pipe. But the pipe data itself can be released on NFS umount prior to dentry-inode pair, connected to it (is case of this pair is open by some process). This is not a problem for almost all pipe users, because all PipeFS file operations checks pipe reference prior to using it. Except evenfd. This thing registers itself with "poll" file operation and thus has a reference to pipe wait queue. This leads to oopses on destroying eventfd after NFS umount (like rpc_idmapd do) since not pipe data left to the point already. The solution is to wait queue from pipe data to internal RPC inode data. This looks more logical, because this wiat queue used only for user-space processes, which already holds inode reference. Note: upcalls have to get pipe->dentry prior to dereferecing wait queue to make sure, that mount point won't disappear from underneath us. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- net/sunrpc/rpc_pipe.c | 39 ++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 426ce6eeee66..a7b422b33eda 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -28,7 +28,6 @@ struct rpc_pipe { int pipelen; int nreaders; int nwriters; - wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; struct delayed_work queue_timeout; @@ -41,6 +40,7 @@ struct rpc_inode { struct inode vfs_inode; void *private; struct rpc_pipe *pipe; + wait_queue_head_t waitq; }; static inline struct rpc_inode * diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b67b2aecc4ff..ac9ee1590739 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -57,7 +57,7 @@ void rpc_pipefs_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); -static void rpc_purge_list(struct rpc_pipe *pipe, struct list_head *head, +static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; @@ -70,7 +70,7 @@ static void rpc_purge_list(struct rpc_pipe *pipe, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(&pipe->waitq); + wake_up(waitq); } static void @@ -80,6 +80,7 @@ rpc_timeout_upcall_queue(struct work_struct *work) struct rpc_pipe *pipe = container_of(work, struct rpc_pipe, queue_timeout.work); void (*destroy_msg)(struct rpc_pipe_msg *); + struct dentry *dentry; spin_lock(&pipe->lock); destroy_msg = pipe->ops->destroy_msg; @@ -87,8 +88,13 @@ rpc_timeout_upcall_queue(struct work_struct *work) list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; } + dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); - rpc_purge_list(pipe, &free_list, destroy_msg, -ETIMEDOUT); + if (dentry) { + rpc_purge_list(&RPC_I(dentry->d_inode)->waitq, + &free_list, destroy_msg, -ETIMEDOUT); + dput(dentry); + } } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, @@ -125,6 +131,7 @@ int rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) { int res = -EPIPE; + struct dentry *dentry; spin_lock(&pipe->lock); if (pipe->nreaders) { @@ -140,8 +147,12 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) pipe->pipelen += msg->len; res = 0; } + dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); - wake_up(&pipe->waitq); + if (dentry) { + wake_up(&RPC_I(dentry->d_inode)->waitq); + dput(dentry); + } return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); @@ -168,7 +179,7 @@ rpc_close_pipes(struct inode *inode) pipe->pipelen = 0; pipe->dentry = NULL; spin_unlock(&pipe->lock); - rpc_purge_list(pipe, &free_list, pipe->ops->destroy_msg, -EPIPE); + rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE); pipe->nwriters = 0; if (need_release && pipe->ops->release_pipe) pipe->ops->release_pipe(inode); @@ -257,7 +268,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; spin_unlock(&pipe->lock); - rpc_purge_list(pipe, &free_list, + rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EAGAIN); } } @@ -330,16 +341,18 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct rpc_pipe *pipe = RPC_I(filp->f_path.dentry->d_inode)->pipe; - unsigned int mask = 0; + struct inode *inode = filp->f_path.dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + unsigned int mask = POLLOUT | POLLWRNORM; - poll_wait(filp, &pipe->waitq, wait); + poll_wait(filp, &rpci->waitq, wait); - mask = POLLOUT | POLLWRNORM; - if (pipe->dentry == NULL) + mutex_lock(&inode->i_mutex); + if (rpci->pipe == NULL) mask |= POLLERR | POLLHUP; - if (filp->private_data || !list_empty(&pipe->pipe)) + else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) mask |= POLLIN | POLLRDNORM; + mutex_unlock(&inode->i_mutex); return mask; } @@ -543,7 +556,6 @@ init_pipe(struct rpc_pipe *pipe) INIT_LIST_HEAD(&pipe->in_downcall); INIT_LIST_HEAD(&pipe->pipe); pipe->pipelen = 0; - init_waitqueue_head(&pipe->waitq); INIT_DELAYED_WORK(&pipe->queue_timeout, rpc_timeout_upcall_queue); pipe->ops = NULL; @@ -1165,6 +1177,7 @@ init_once(void *foo) inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->pipe = NULL; + init_waitqueue_head(&rpci->waitq); } int register_rpc_pipefs(void) From a59c30acfbe701dc991f4f84abce27818120a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Mar 2012 11:17:47 -0500 Subject: [PATCH 233/316] NFSv4.1: Get rid of redundant NFS4CLNT_LAYOUTRECALL tests The NFS4CLNT_LAYOUTRECALL tests in pnfs_layout_process and pnfs_update_layout are redundant. In the case of a bulk layout recall, we're always testing for the NFS_LAYOUT_BULK_RECALL flay anyway. In the case of a file or segment recall, the call to pnfs_set_layout_stateid() updates the layout_header 'barrier' sequence id, which triggers the test in pnfs_layoutgets_blocked() and is less race-prone than NFS4CLNT_LAYOUTRECALL anyway. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a53421604bc4..402efc2f5b70 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -966,8 +966,7 @@ pnfs_update_layout(struct inode *ino, } /* Do we even need to bother with this? */ - if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || - test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } @@ -1032,7 +1031,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; /* Inject layout blob into I/O device driver */ @@ -1048,8 +1046,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } spin_lock(&ino->i_lock); - if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || - test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s forget reply due to recall\n", __func__); goto out_forget_reply; } From 0cb3284b535bd5eacc287632b55150c8e5d9edc7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Mar 2012 11:17:50 -0500 Subject: [PATCH 234/316] NFSv4.1: Get rid of NFS4CLNT_LAYOUTRECALL The NFS4CLNT_LAYOUTRECALL bit is a long-term impediment to scalability. It basically stops all other recalls by a given server once any layout recall is requested. If the recall is for a different file, then we don't care. If the recall applies to the same file, then we're in one of two situations: Either we are in the case of a replay of an existing request, in which case the session is supposed to deal with matters, or we are dealing with a completely different request, in which case we should just try to process it. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 6 +----- fs/nfs/nfs4_fs.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f71978d107d0..0e0865e38065 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -233,17 +233,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - u32 res = NFS4ERR_DELAY; + u32 res; dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); - if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state)) - goto out; if (args->cbl_recall_type == RETURN_FILE) res = initiate_file_draining(clp, args); else res = initiate_bulk_draining(clp, args); - clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state); -out: dprintk("%s returning %i\n", __func__, res); return res; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b133b50dec9a..19079ec8252c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -20,7 +20,6 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, - NFS4CLNT_LAYOUTRECALL, NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, From 59e6b9c11341e3b8ac5925427c903d4eae435bd8 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 24 Feb 2012 14:14:50 -0500 Subject: [PATCH 235/316] Created a function for setting timeouts on keys The keyctl_set_timeout function isn't exported to other parts of the kernel, but I want to use it for the NFS idmapper. I already have the key, but I wanted a generic way to set the timeout. Signed-off-by: Bryan Schumaker Acked-by: David Howells Signed-off-by: Trond Myklebust --- include/linux/key.h | 2 ++ security/keys/key.c | 20 ++++++++++++++++++++ security/keys/keyctl.c | 18 ++---------------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 5253471cd2ea..be3995d1024a 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -276,6 +276,8 @@ static inline key_serial_t key_serial(const struct key *key) return key ? key->serial : 0; } +extern void key_set_timeout(struct key *, unsigned); + /** * key_is_instantiated - Determine if a key has been positively instantiated * @key: The key to check. diff --git a/security/keys/key.c b/security/keys/key.c index 7ada8019be1f..06783cffb3af 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -671,6 +671,26 @@ found_kernel_type: return ktype; } +void key_set_timeout(struct key *key, unsigned timeout) +{ + struct timespec now; + time_t expiry = 0; + + /* make the changes with the locks held to prevent races */ + down_write(&key->sem); + + if (timeout > 0) { + now = current_kernel_time(); + expiry = now.tv_sec + timeout; + } + + key->expiry = expiry; + key_schedule_gc(key->expiry + key_gc_delay); + + up_write(&key->sem); +} +EXPORT_SYMBOL_GPL(key_set_timeout); + /* * Unlock a key type locked by key_type_lookup(). */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0b3f5d72af1c..0a4a21d73f6a 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1244,10 +1245,8 @@ error: */ long keyctl_set_timeout(key_serial_t id, unsigned timeout) { - struct timespec now; struct key *key, *instkey; key_ref_t key_ref; - time_t expiry; long ret; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, @@ -1273,20 +1272,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) okay: key = key_ref_to_ptr(key_ref); - - /* make the changes with the locks held to prevent races */ - down_write(&key->sem); - - expiry = 0; - if (timeout > 0) { - now = current_kernel_time(); - expiry = now.tv_sec + timeout; - } - - key->expiry = expiry; - key_schedule_gc(key->expiry + key_gc_delay); - - up_write(&key->sem); + key_set_timeout(key, timeout); key_put(key); ret = 0; From 57e62324e469e092ecc6c94a7a86fe4bd6ac5172 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 24 Feb 2012 14:14:51 -0500 Subject: [PATCH 236/316] NFS: Store the legacy idmapper result in the keyring This patch removes the old hashmap-based caching and instead uses a "request key actor" to place an upcall to the legacy idmapper rather than going through /sbin/request-key. This will only be used as a fallback if /etc/request-key.conf isn't configured to use nfsidmap. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 582 ++++++++++++-------------------------- include/linux/nfs_idmap.h | 11 - 2 files changed, 180 insertions(+), 413 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d4db3b6f4b8e..f72c1fc074e1 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -34,42 +34,28 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include -#include -#include -#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* include files needed by legacy idmapper */ -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include "nfs4_fs.h" +#include +#include +#include +#include +#include +#include + #include "internal.h" #include "netns.h" #define NFS_UINT_MAXLEN 11 -#define IDMAP_HASH_SZ 128 /* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600 * HZ; +unsigned int nfs_idmap_cache_timeout = 600; const struct cred *id_resolver_cache; +struct key_type key_type_id_resolver_legacy; /** @@ -261,8 +247,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, return desclen; } -static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, - const char *type, void *data, size_t data_size) +static ssize_t nfs_idmap_request_key(struct key_type *key_type, + const char *name, size_t namelen, + const char *type, void *data, + size_t data_size, struct idmap *idmap) { const struct cred *saved_cred; struct key *rkey; @@ -275,8 +263,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, goto out; saved_cred = override_creds(id_resolver_cache); - rkey = request_key(&key_type_id_resolver, desc, ""); + if (idmap) + rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); + else + rkey = request_key(&key_type_id_resolver, desc, ""); revert_creds(saved_cred); + kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); @@ -309,31 +301,46 @@ out: return ret; } +static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, + const char *type, void *data, + size_t data_size, struct idmap *idmap) +{ + ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, + name, namelen, type, data, + data_size, NULL); + if (ret < 0) { + ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, + name, namelen, type, data, + data_size, idmap); + } + return ret; +} /* ID -> Name */ -static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) +static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, + size_t buflen, struct idmap *idmap) { char id_str[NFS_UINT_MAXLEN]; int id_len; ssize_t ret; id_len = snprintf(id_str, sizeof(id_str), "%u", id); - ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); + ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); if (ret < 0) return -EINVAL; return ret; } /* Name -> ID */ -static int nfs_idmap_lookup_id(const char *name, size_t namelen, - const char *type, __u32 *id) +static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type, + __u32 *id, struct idmap *idmap) { char id_str[NFS_UINT_MAXLEN]; long id_long; ssize_t data_size; int ret = 0; - data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); + data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap); if (data_size <= 0) { ret = -EINVAL; } else { @@ -344,54 +351,47 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, } /* idmap classic begins here */ -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); - -struct idmap_hashent { - unsigned long ih_expires; - __u32 ih_id; - size_t ih_namelen; - const char *ih_name; -}; - -struct idmap_hashtable { - __u8 h_type; - struct idmap_hashent *h_entries; -}; +module_param(nfs_idmap_cache_timeout, int, 0644); struct idmap { struct rpc_pipe *idmap_pipe; - wait_queue_head_t idmap_wq; - struct idmap_msg idmap_im; - struct mutex idmap_lock; /* Serializes upcalls */ - struct mutex idmap_im_lock; /* Protects the hashtable */ - struct idmap_hashtable idmap_user_hash; - struct idmap_hashtable idmap_group_hash; + struct key_construction *idmap_key_cons; }; +enum { + Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err +}; + +static const match_table_t nfs_idmap_tokens = { + { Opt_find_uid, "uid:%s" }, + { Opt_find_gid, "gid:%s" }, + { Opt_find_user, "user:%s" }, + { Opt_find_group, "group:%s" }, + { Opt_find_err, NULL } +}; + +static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); -static unsigned int fnvhash32(const void *, size_t); - static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, }; +struct key_type key_type_id_resolver_legacy = { + .name = "id_resolver", + .instantiate = user_instantiate, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, + .request_key = nfs_idmap_legacy_upcall, +}; + static void __nfs_idmap_unregister(struct rpc_pipe *pipe) { if (pipe->dentry) @@ -468,38 +468,11 @@ nfs_idmap_new(struct nfs_client *clp) return error; } idmap->idmap_pipe = pipe; - mutex_init(&idmap->idmap_lock); - mutex_init(&idmap->idmap_im_lock); - init_waitqueue_head(&idmap->idmap_wq); - idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; - idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; clp->cl_idmap = idmap; return 0; } -static void -idmap_alloc_hashtable(struct idmap_hashtable *h) -{ - if (h->h_entries != NULL) - return; - h->h_entries = kcalloc(IDMAP_HASH_SZ, - sizeof(*h->h_entries), - GFP_KERNEL); -} - -static void -idmap_free_hashtable(struct idmap_hashtable *h) -{ - int i; - - if (h->h_entries == NULL) - return; - for (i = 0; i < IDMAP_HASH_SZ; i++) - kfree(h->h_entries[i].ih_name); - kfree(h->h_entries); -} - void nfs_idmap_delete(struct nfs_client *clp) { @@ -510,8 +483,6 @@ nfs_idmap_delete(struct nfs_client *clp) nfs_idmap_unregister(clp, idmap->idmap_pipe); rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; - idmap_free_hashtable(&idmap->idmap_user_hash); - idmap_free_hashtable(&idmap->idmap_group_hash); kfree(idmap); } @@ -617,222 +588,107 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -/* - * Helper routines for manipulating the hashtable - */ -static inline struct idmap_hashent * -idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, + struct rpc_pipe_msg *msg) { - if (h->h_entries == NULL) - return NULL; - return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; -} + substring_t substr; + int token, ret; -static struct idmap_hashent * -idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) -{ - struct idmap_hashent *he = idmap_name_hash(h, name, len); + memset(im, 0, sizeof(*im)); + memset(msg, 0, sizeof(*msg)); - if (he == NULL) - return NULL; - if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) - return NULL; - if (time_after(jiffies, he->ih_expires)) - return NULL; - return he; -} + im->im_type = IDMAP_TYPE_GROUP; + token = match_token(desc, nfs_idmap_tokens, &substr); -static inline struct idmap_hashent * -idmap_id_hash(struct idmap_hashtable* h, __u32 id) -{ - if (h->h_entries == NULL) - return NULL; - return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; -} + switch (token) { + case Opt_find_uid: + im->im_type = IDMAP_TYPE_USER; + case Opt_find_gid: + im->im_conv = IDMAP_CONV_NAMETOID; + ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ); + break; -static struct idmap_hashent * -idmap_lookup_id(struct idmap_hashtable *h, __u32 id) -{ - struct idmap_hashent *he = idmap_id_hash(h, id); + case Opt_find_user: + im->im_type = IDMAP_TYPE_USER; + case Opt_find_group: + im->im_conv = IDMAP_CONV_IDTONAME; + ret = match_int(&substr, &im->im_id); + break; - if (he == NULL) - return NULL; - if (he->ih_id != id || he->ih_namelen == 0) - return NULL; - if (time_after(jiffies, he->ih_expires)) - return NULL; - return he; -} - -/* - * Routines for allocating new entries in the hashtable. - * For now, we just have 1 entry per bucket, so it's all - * pretty trivial. - */ -static inline struct idmap_hashent * -idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) -{ - idmap_alloc_hashtable(h); - return idmap_name_hash(h, name, len); -} - -static inline struct idmap_hashent * -idmap_alloc_id(struct idmap_hashtable *h, __u32 id) -{ - idmap_alloc_hashtable(h); - return idmap_id_hash(h, id); -} - -static void -idmap_update_entry(struct idmap_hashent *he, const char *name, - size_t namelen, __u32 id) -{ - char *str = kmalloc(namelen + 1, GFP_KERNEL); - if (str == NULL) - return; - kfree(he->ih_name); - he->ih_id = id; - memcpy(str, name, namelen); - str[namelen] = '\0'; - he->ih_name = str; - he->ih_namelen = namelen; - he->ih_expires = jiffies + nfs_idmap_cache_timeout; -} - -/* - * Name -> ID - */ -static int -nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, - const char *name, size_t namelen, __u32 *id) -{ - struct rpc_pipe_msg msg; - struct idmap_msg *im; - struct idmap_hashent *he; - DECLARE_WAITQUEUE(wq, current); - int ret = -EIO; - - im = &idmap->idmap_im; - - /* - * String sanity checks - * Note that the userland daemon expects NUL terminated strings - */ - for (;;) { - if (namelen == 0) - return -EINVAL; - if (name[namelen-1] != '\0') - break; - namelen--; - } - if (namelen >= IDMAP_NAMESZ) - return -EINVAL; - - mutex_lock(&idmap->idmap_lock); - mutex_lock(&idmap->idmap_im_lock); - - he = idmap_lookup_name(h, name, namelen); - if (he != NULL) { - *id = he->ih_id; - ret = 0; + default: + ret = -EINVAL; goto out; } - memset(im, 0, sizeof(*im)); - memcpy(im->im_name, name, namelen); + msg->data = im; + msg->len = sizeof(struct idmap_msg); - im->im_type = h->h_type; - im->im_conv = IDMAP_CONV_NAMETOID; - - memset(&msg, 0, sizeof(msg)); - msg.data = im; - msg.len = sizeof(*im); - - add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(idmap->idmap_pipe, &msg) < 0) { - remove_wait_queue(&idmap->idmap_wq, &wq); - goto out; - } - - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&idmap->idmap_im_lock); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&idmap->idmap_wq, &wq); - mutex_lock(&idmap->idmap_im_lock); - - if (im->im_status & IDMAP_STATUS_SUCCESS) { - *id = im->im_id; - ret = 0; - } - - out: - memset(im, 0, sizeof(*im)); - mutex_unlock(&idmap->idmap_im_lock); - mutex_unlock(&idmap->idmap_lock); +out: return ret; } -/* - * ID -> Name - */ -static int -nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, - __u32 id, char *name) +static int nfs_idmap_legacy_upcall(struct key_construction *cons, + const char *op, + void *aux) { - struct rpc_pipe_msg msg; + struct rpc_pipe_msg *msg; struct idmap_msg *im; - struct idmap_hashent *he; - DECLARE_WAITQUEUE(wq, current); - int ret = -EIO; - unsigned int len; + struct idmap *idmap = (struct idmap *)aux; + struct key *key = cons->key; + int ret; - im = &idmap->idmap_im; - - mutex_lock(&idmap->idmap_lock); - mutex_lock(&idmap->idmap_im_lock); - - he = idmap_lookup_id(h, id); - if (he) { - memcpy(name, he->ih_name, he->ih_namelen); - ret = he->ih_namelen; - goto out; + /* msg and im are freed in idmap_pipe_destroy_msg */ + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out0; } - memset(im, 0, sizeof(*im)); - im->im_type = h->h_type; - im->im_conv = IDMAP_CONV_IDTONAME; - im->im_id = id; - - memset(&msg, 0, sizeof(msg)); - msg.data = im; - msg.len = sizeof(*im); - - add_wait_queue(&idmap->idmap_wq, &wq); - - if (rpc_queue_upcall(idmap->idmap_pipe, &msg) < 0) { - remove_wait_queue(&idmap->idmap_wq, &wq); - goto out; + im = kmalloc(sizeof(*im), GFP_KERNEL); + if (IS_ERR(im)) { + ret = PTR_ERR(im); + goto out1; } - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&idmap->idmap_im_lock); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&idmap->idmap_wq, &wq); - mutex_lock(&idmap->idmap_im_lock); + ret = nfs_idmap_prepare_message(key->description, im, msg); + if (ret < 0) + goto out2; - if (im->im_status & IDMAP_STATUS_SUCCESS) { - if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) - goto out; - memcpy(name, im->im_name, len); - ret = len; + idmap->idmap_key_cons = cons; + + return rpc_queue_upcall(idmap->idmap_pipe, msg); + +out2: + kfree(im); +out1: + kfree(msg); +out0: + complete_request_key(cons, ret); + return ret; +} + +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) +{ + return key_instantiate_and_link(key, data, strlen(data) + 1, + id_resolver_cache->thread_keyring, + authkey); +} + +static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) +{ + char id_str[NFS_UINT_MAXLEN]; + int ret = -EINVAL; + + switch (im->im_conv) { + case IDMAP_CONV_NAMETOID: + sprintf(id_str, "%d", im->im_id); + ret = nfs_idmap_instantiate(key, authkey, id_str); + break; + case IDMAP_CONV_IDTONAME: + ret = nfs_idmap_instantiate(key, authkey, im->im_name); + break; } - out: - memset(im, 0, sizeof(*im)); - mutex_unlock(&idmap->idmap_im_lock); - mutex_unlock(&idmap->idmap_lock); return ret; } @@ -841,141 +697,69 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct idmap_msg im_in, *im = &idmap->idmap_im; - struct idmap_hashtable *h; - struct idmap_hashent *he = NULL; + struct key_construction *cons = idmap->idmap_key_cons; + struct idmap_msg im; size_t namelen_in; int ret; - if (mlen != sizeof(im_in)) - return -ENOSPC; - - if (copy_from_user(&im_in, src, mlen) != 0) - return -EFAULT; - - mutex_lock(&idmap->idmap_im_lock); - - ret = mlen; - im->im_status = im_in.im_status; - /* If we got an error, terminate now, and wake up pending upcalls */ - if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { - wake_up(&idmap->idmap_wq); + if (mlen != sizeof(im)) { + ret = -ENOSPC; goto out; } - /* Sanity checking of strings */ - ret = -EINVAL; - namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); - if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) - goto out; - - switch (im_in.im_type) { - case IDMAP_TYPE_USER: - h = &idmap->idmap_user_hash; - break; - case IDMAP_TYPE_GROUP: - h = &idmap->idmap_group_hash; - break; - default: - goto out; - } - - switch (im_in.im_conv) { - case IDMAP_CONV_IDTONAME: - /* Did we match the current upcall? */ - if (im->im_conv == IDMAP_CONV_IDTONAME - && im->im_type == im_in.im_type - && im->im_id == im_in.im_id) { - /* Yes: copy string, including the terminating '\0' */ - memcpy(im->im_name, im_in.im_name, namelen_in); - im->im_name[namelen_in] = '\0'; - wake_up(&idmap->idmap_wq); - } - he = idmap_alloc_id(h, im_in.im_id); - break; - case IDMAP_CONV_NAMETOID: - /* Did we match the current upcall? */ - if (im->im_conv == IDMAP_CONV_NAMETOID - && im->im_type == im_in.im_type - && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in - && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { - im->im_id = im_in.im_id; - wake_up(&idmap->idmap_wq); - } - he = idmap_alloc_name(h, im_in.im_name, namelen_in); - break; - default: + if (copy_from_user(&im, src, mlen) != 0) { + ret = -EFAULT; goto out; } - /* If the entry is valid, also copy it to the cache */ - if (he != NULL) - idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); - ret = mlen; + if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { + ret = mlen; + complete_request_key(idmap->idmap_key_cons, -ENOKEY); + goto out_incomplete; + } + + namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { + ret = -EINVAL; + goto out; + } + + ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); + if (ret >= 0) { + key_set_timeout(cons->key, nfs_idmap_cache_timeout); + ret = mlen; + } + out: - mutex_unlock(&idmap->idmap_im_lock); + complete_request_key(idmap->idmap_key_cons, ret); +out_incomplete: return ret; } static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { - struct idmap_msg *im = msg->data; - struct idmap *idmap = container_of(im, struct idmap, idmap_im); - - if (msg->errno >= 0) - return; - mutex_lock(&idmap->idmap_im_lock); - im->im_status = IDMAP_STATUS_LOOKUPFAIL; - wake_up(&idmap->idmap_wq); - mutex_unlock(&idmap->idmap_im_lock); -} - -/* - * Fowler/Noll/Vo hash - * http://www.isthe.com/chongo/tech/comp/fnv/ - */ - -#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ -#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ - -static unsigned int fnvhash32(const void *buf, size_t buflen) -{ - const unsigned char *p, *end = (const unsigned char *)buf + buflen; - unsigned int hash = FNV_1_32; - - for (p = buf; p < end; p++) { - hash *= FNV_P_32; - hash ^= (unsigned int)*p; - } - - return hash; + /* Free memory allocated in nfs_idmap_legacy_upcall() */ + kfree(msg->data); + kfree(msg); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = server->nfs_client->cl_idmap; - int ret = -EINVAL; if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; - ret = nfs_idmap_lookup_id(name, namelen, "uid", uid); - if (ret < 0) - ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); - return ret; + return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); } int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) { struct idmap *idmap = server->nfs_client->cl_idmap; - int ret = -EINVAL; if (nfs_map_string_to_numeric(name, namelen, gid)) return 0; - ret = nfs_idmap_lookup_id(name, namelen, "gid", gid); - if (ret < 0) - ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid); - return ret; + return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); } int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) @@ -983,11 +767,8 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); - if (ret < 0) - ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); - } + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; @@ -997,11 +778,8 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); - if (ret < 0) - ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf); - } + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); if (ret < 0) ret = nfs_map_numeric_to_string(gid, buf, buflen); return ret; diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 7eed2012d288..717fa5019e75 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -69,19 +69,8 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#ifdef CONFIG_NFS_V4 int nfs_idmap_init(void); void nfs_idmap_quit(void); -#else -static inline int nfs_idmap_init(void) -{ - return 0; -} - -static inline void nfs_idmap_quit(void) -{} -#endif - int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); From db8ac8ba871ae7b97118cfb2913b4986867f09a7 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 17 Feb 2012 15:20:24 -0500 Subject: [PATCH 237/316] NFSv4: Send implementation id with exchange_id Send the nfs implementation id in EXCHANGE_ID requests unless the module parameter nfs.send_implementation_id is 0. This adds a CONFIG variable for the nii_domain that defaults to "kernel.org". Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 9 +++++++ fs/nfs/Kconfig | 12 +++++++++ fs/nfs/nfs4xdr.c | 41 +++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1d369c6286e1..7bae0fd3b63c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1678,6 +1678,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. back to using the idmapper. To turn off this behaviour, set the value to '0'. + nfs.send_implementation_id = + [NFSv4.1] Send client implementation identification + information in exchange_id requests. + If zero, no implementation identification information + will be sent. + The default is to send the implementation identification + information. + + nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ee86cfcd6c33..7bce64c7060e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -99,6 +99,18 @@ config PNFS_OBJLAYOUT depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD default m +config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN + string "NFSv4.1 Implementation ID Domain" + depends on NFS_V4_1 + default "kernel.org" + help + This option defines the domain portion of the implementation ID that + may be sent in the NFS exchange_id operation. The value must be in + the format of a DNS domain name and should be set to the DNS domain + name of the distribution. + If the NFS client is unchanged from the upstream kernel, this + option should be set to the default "kernel.org". + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ae7834366712..d824aedb1237 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include #include #include #include @@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int); 1 /* flags */ + \ 1 /* spa_how */ + \ 0 /* SP4_NONE (for now) */ + \ - 1 /* zero implemetation id array */) + 1 /* implementation id array of size 1 */ + \ + 1 /* nii_domain */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 1 /* nii_name */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 3 /* nii_date */) #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 2 /* eir_clientid */ + \ 1 /* eir_sequenceid */ + \ @@ -838,6 +845,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ +static unsigned short send_implementation_id = 1; + +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); + static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, @@ -1766,6 +1779,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; + char impl_name[NFS4_OPAQUE_LIMIT]; + int len = 0; p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); *p++ = cpu_to_be32(OP_EXCHANGE_ID); @@ -1776,7 +1791,29 @@ static void encode_exchange_id(struct xdr_stream *xdr, p = reserve_space(xdr, 12); *p++ = cpu_to_be32(args->flags); *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ - *p = cpu_to_be32(0); /* zero length implementation id array */ + + if (send_implementation_id && + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) + <= NFS4_OPAQUE_LIMIT + 1) + len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + + if (len > 0) { + *p = cpu_to_be32(1); /* implementation id array length=1 */ + + encode_string(xdr, + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, + CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN); + encode_string(xdr, len, impl_name); + /* just send zeros for nii_date - the date is in nii_name */ + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, 0); + *p = cpu_to_be32(0); + } else + *p = cpu_to_be32(0); /* implementation id array length=0 */ + hdr->nops++; hdr->replen += decode_exchange_id_maxsz; } From 9edbd953f8aeabf49b89c7c29ff9e31560775b27 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 17 Feb 2012 15:20:25 -0500 Subject: [PATCH 238/316] NFSv4: fix server_scope memory leak server_scope would never be freed if nfs4_check_cl_exchange_flags() returned non-zero Signed-off-by: Weston Andros Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87c584dd88b1..20c3bb06763a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4945,8 +4945,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_auth->au_flavor); res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); - if (unlikely(!res.server_scope)) - return -ENOMEM; + if (unlikely(!res.server_scope)) { + status = -ENOMEM; + goto out; + } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) @@ -4963,12 +4965,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->server_scope = NULL; } - if (!clp->server_scope) + if (!clp->server_scope) { clp->server_scope = res.server_scope; - else - kfree(res.server_scope); + goto out; + } } - + kfree(res.server_scope); +out: dprintk("<-- %s status= %d\n", __func__, status); return status; } From 7d2ed9ac22bc6bf0d34e8fd291a5295f373b384e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 17 Feb 2012 15:20:26 -0500 Subject: [PATCH 239/316] NFSv4: parse and display server implementation ids Shows the implementation ids in /proc/self/mountstats. This doesn't break the nfs-utils mountstats tool. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4proc.c | 21 ++++++++++++++++++++ fs/nfs/nfs4xdr.c | 42 ++++++++++++++++++++++++++++++++++----- fs/nfs/super.c | 8 ++++++++ include/linux/nfs_fs_sb.h | 2 ++ include/linux/nfs_xdr.h | 15 +++++++------- 6 files changed, 76 insertions(+), 13 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 592b5583aa3a..1506adf4d4ed 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -304,6 +304,7 @@ static void nfs_free_client(struct nfs_client *clp) put_net(clp->net); kfree(clp->cl_hostname); kfree(clp->server_scope); + kfree(clp->impl_id); kfree(clp); dprintk("<-- nfs_free_client()\n"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 20c3bb06763a..90a17cc3ebc9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4950,10 +4950,23 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } + res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); + if (unlikely(!res.impl_id)) { + status = -ENOMEM; + goto out_server_scope; + } + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + if (!status) { + /* use the most recent implementation id */ + kfree(clp->impl_id); + clp->impl_id = res.impl_id; + } else + kfree(res.impl_id); + if (!status) { if (clp->server_scope && !nfs41_same_server_scope(clp->server_scope, @@ -4970,8 +4983,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } } + +out_server_scope: kfree(res.server_scope); out: + if (clp->impl_id) + dprintk("%s: Server Implementation ID: " + "domain: %s, name: %s, date: %llu,%u\n", + __func__, clp->impl_id->domain, clp->impl_id->name, + clp->impl_id->date.seconds, + clp->impl_id->date.nseconds); dprintk("<-- %s status= %d\n", __func__, status); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d824aedb1237..b7c04339fdc1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -291,7 +291,11 @@ static int nfs4_stat_to_errno(int); /* eir_server_scope<> */ \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ 1 /* eir_server_impl_id array length */ + \ - 0 /* ignored eir_server_impl_id contents */) + 1 /* nii_domain */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 1 /* nii_name */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 3 /* nii_date */) #define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) #define decode_channel_attrs_maxsz (6 + \ 1 /* ca_rdma_ird.len */ + \ @@ -5256,6 +5260,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, char *dummy_str; int status; struct nfs_client *clp = res->client; + uint32_t impl_id_count; status = decode_op_hdr(xdr, OP_EXCHANGE_ID); if (status) @@ -5297,11 +5302,38 @@ static int decode_exchange_id(struct xdr_stream *xdr, memcpy(res->server_scope->server_scope, dummy_str, dummy); res->server_scope->server_scope_sz = dummy; - /* Throw away Implementation id array */ - status = decode_opaque_inline(xdr, &dummy, &dummy_str); - if (unlikely(status)) - return status; + /* Implementation Id */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + impl_id_count = be32_to_cpup(p++); + if (impl_id_count) { + /* nii_domain */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(res->impl_id->domain, dummy_str, dummy); + + /* nii_name */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(res->impl_id->name, dummy_str, dummy); + + /* nii_date */ + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &res->impl_id->date.seconds); + res->impl_id->date.nseconds = be32_to_cpup(p); + + /* if there's more than one entry, ignore the rest */ + } return 0; out_overflow: print_overflow_msg(__func__, xdr); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6708f3044eb0..8154accd1168 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -809,6 +809,14 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + if (nfss->nfs_client && nfss->nfs_client->impl_id) { + struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; + seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," + "date='%llu,%u'", + impl_id->name, impl_id->domain, + impl_id->date.seconds, impl_id->date.nseconds); + } + seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%u", nfss->wtmult); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3bf47666646e..03d0b91c2d5b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -18,6 +18,7 @@ struct nfs4_sequence_res; struct nfs_server; struct nfs4_minor_version_ops; struct server_scope; +struct nfs41_impl_id; /* * The nfs_client identifies our client state to the server. @@ -86,6 +87,7 @@ struct nfs_client { #endif struct server_scope *server_scope; /* from exchange_id */ + struct nfs41_impl_id *impl_id; /* from exchange_id */ struct net *net; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index adbc84ac345f..046c1bfddc33 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1054,14 +1054,6 @@ struct nfstime4 { }; #ifdef CONFIG_NFS_V4_1 -struct nfs_impl_id4 { - u32 domain_len; - char *domain; - u32 name_len; - char *name; - struct nfstime4 date; -}; - #define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; @@ -1082,10 +1074,17 @@ struct server_scope { char server_scope[NFS4_OPAQUE_LIMIT]; }; +struct nfs41_impl_id { + char domain[NFS4_OPAQUE_LIMIT + 1]; + char name[NFS4_OPAQUE_LIMIT + 1]; + struct nfstime4 date; +}; + struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; struct server_scope *server_scope; + struct nfs41_impl_id *impl_id; }; struct nfs41_create_session_args { From 0d71b058092fc98cfef8e8f6d913180a10a55397 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Mar 2012 13:59:49 -0500 Subject: [PATCH 240/316] NFS: Extend the -overs= mount option to allow 4.x minorversions Allow the user to mount an NFSv4.0 or NFSv4.1 partition using a standard syntax of '-overs=4.0', or '-overs=4.1' rather than the more cumbersome '-overs=4,minorversion=1'. See also the earlier patch by Dros Adamson, which added the Linux-specific syntax '-ov4.0', '-ov4.1'. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 84 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8154accd1168..ab58bb9b6115 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -98,10 +98,10 @@ enum { Opt_namelen, Opt_mountport, Opt_mountvers, - Opt_nfsvers, Opt_minorversion, /* Mount options that take string arguments */ + Opt_nfsvers, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, @@ -166,9 +166,10 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_namelen, "namlen=%s" }, { Opt_mountport, "mountport=%s" }, { Opt_mountvers, "mountvers=%s" }, + { Opt_minorversion, "minorversion=%s" }, + { Opt_nfsvers, "nfsvers=%s" }, { Opt_nfsvers, "vers=%s" }, - { Opt_minorversion, "minorversion=%s" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -262,6 +263,22 @@ static match_table_t nfs_local_lock_tokens = { { Opt_local_lock_err, NULL } }; +enum { + Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, + Opt_vers_4_1, + + Opt_vers_err +}; + +static match_table_t nfs_vers_tokens = { + { Opt_vers_2, "2" }, + { Opt_vers_3, "3" }, + { Opt_vers_4, "4" }, + { Opt_vers_4_0, "4.0" }, + { Opt_vers_4_1, "4.1" }, + + { Opt_vers_err, NULL } +}; static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); @@ -1064,6 +1081,40 @@ static int nfs_parse_security_flavors(char *value, return 1; } +static int nfs_parse_version_string(char *string, + struct nfs_parsed_mount_data *mnt, + substring_t *args) +{ + mnt->flags &= ~NFS_MOUNT_VER3; + switch (match_token(string, nfs_vers_tokens, args)) { + case Opt_vers_2: + mnt->version = 2; + break; + case Opt_vers_3: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; + case Opt_vers_4: + /* Backward compatibility option. In future, + * the mount program should always supply + * a NFSv4 minor version number. + */ + mnt->version = 4; + break; + case Opt_vers_4_0: + mnt->version = 4; + mnt->minorversion = 0; + break; + case Opt_vers_4_1: + mnt->version = 4; + mnt->minorversion = 1; + break; + default: + return 0; + } + return 1; +} + static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); @@ -1317,26 +1368,6 @@ static int nfs_parse_mount_options(char *raw, goto out_invalid_value; mnt->mount_server.version = option; break; - case Opt_nfsvers: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - switch (option) { - case NFS2_VERSION: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 2; - break; - case NFS3_VERSION: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case NFS4_VERSION: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - break; - default: - goto out_invalid_value; - } - break; case Opt_minorversion: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; @@ -1348,6 +1379,15 @@ static int nfs_parse_mount_options(char *raw, /* * options that take text values */ + case Opt_nfsvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_version_string(string, mnt, args); + kfree(string); + if (!rc) + goto out_invalid_value; + break; case Opt_sec: string = match_strdup(args); if (string == NULL) From 7bbceb6f2bdda67054bc66035a9543623e539126 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Mar 2012 14:00:20 -0500 Subject: [PATCH 241/316] NFS: Ensure we display the minor version correctly in /proc/mounts etc. The 'minorversion' mount option is now deprecated, so we need to display the minor version number in the 'vers=' format. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ab58bb9b6115..7f0c93f8afe3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -640,7 +640,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, struct nfs_client *clp = nfss->nfs_client; seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); - seq_printf(m, ",minorversion=%u", clp->cl_minorversion); } #else static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, @@ -649,6 +648,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, } #endif +static void nfs_show_nfs_version(struct seq_file *m, + unsigned int version, + unsigned int minorversion) +{ + seq_printf(m, ",vers=%u", version); + if (version == 4) + seq_printf(m, ".%u", minorversion); +} + /* * Describe the mount options in force on this server representation */ @@ -676,7 +684,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, u32 version = clp->rpc_ops->version; int local_flock, local_fcntl; - seq_printf(m, ",vers=%u", version); + nfs_show_nfs_version(m, version, clp->cl_minorversion); seq_printf(m, ",rsize=%u", nfss->rsize); seq_printf(m, ",wsize=%u", nfss->wsize); if (nfss->bsize != 0) From 3862279a5fcf44d0c68fa54a507a5bcd2ab4f0b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Mar 2012 14:06:39 -0500 Subject: [PATCH 242/316] NFS: Consolidate the parsing of the '-ov4.x' and '-overs=4.x' mount options Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7f0c93f8afe3..f4ccdae6a0cf 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -80,7 +80,6 @@ enum { Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, - Opt_v2, Opt_v3, Opt_v4, Opt_v4_0, Opt_v4_1, Opt_udp, Opt_tcp, Opt_rdma, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, @@ -133,11 +132,6 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noac, "noac" }, { Opt_lock, "lock" }, { Opt_nolock, "nolock" }, - { Opt_v2, "v2" }, - { Opt_v3, "v3" }, - { Opt_v4, "v4" }, - { Opt_v4_0, "v4.0" }, - { Opt_v4_1, "v4.1" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, { Opt_rdma, "rdma" }, @@ -183,6 +177,9 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_fscache_uniq, "fsc=%s" }, { Opt_local_lock, "local_lock=%s" }, + /* The following needs to be listed after all other options */ + { Opt_nfsvers, "v%s" }, + { Opt_err, NULL } }; @@ -1228,28 +1225,6 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; - case Opt_v2: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 2; - break; - case Opt_v3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_v4: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - break; - case Opt_v4_0: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - mnt->minorversion = 0; - break; - case Opt_v4_1: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - mnt->minorversion = 1; - break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; From 88b8e133c46792d264c991065c2c395d0b3b5482 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:00:23 -0500 Subject: [PATCH 243/316] NFS: Make nfs_cache_array.size a signed integer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate a number of implicit type casts in comparisons, and these compiler warnings: fs/nfs/dir.c: In function ‘nfs_readdir_clear_array’: fs/nfs/dir.c:264:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/dir.c: In function ‘nfs_readdir_search_for_cookie’: fs/nfs/dir.c:352:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/dir.c: In function ‘nfs_do_filldir’: fs/nfs/dir.c:769:38: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/dir.c:780:9: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bb132a88f4e8..9952170271b2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -207,7 +207,7 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { - unsigned int size; + int size; int eof_index; u64 last_cookie; struct nfs_cache_array_entry array[0]; From 02a2976c9180a7dcc43bc46cf69bd3687a9d7ea6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:00:31 -0500 Subject: [PATCH 244/316] NFS: Clean up debugging in decode_pathname() I noticed recently that decode_attr_fs_locations() is not generating very pretty debugging output. The pathname components each appear on a separate line of output, though that does not appear to be the intended display behavior. The preferred way to generate continued lines of output on the console is to use pr_cont(). Note that incoming pathname4 components contain a string that is not necessarily NUL-terminated. I did actually see some trailing garbage on the console. In addition to correcting the line continuation problem, add a string precision format specifier to ensure that each component string is displayed properly, and that vsnprintf() does not Oops. Someone pointed out that allowing incoming network data to possibly generate a console line of unbounded length may not be such a good idea. Since this output will rarely be enabled, and there is a hard upper bound (NFS4_PATHNAME_MAXCOMPONENTS) in our implementation, this is probably not a major concern. It might be useful to additionally sanity-check the length of each incoming component, however. RFC 3530bis15 does not suggest a maximum number of UTF-8 characters per component for either the pathname4 or component4 types. However, we could invent one that is appropriate for our implementation. Another possibility is to scrap all of this and print these pathnames in upper layers after a reasonable amount of sanity checking in the XDR layer. This would give us an opportunity to allocate a full buffer so that the whole pathname would be output via a single dprintk. Introduced by commit 7aaa0b3b: "NFSv4: convert fs-locations-components to conform to RFC3530," (June 9, 2006). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b7c04339fdc1..b5c5212cd184 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3555,16 +3555,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) n = be32_to_cpup(p); if (n == 0) goto root_path; - dprintk("path "); + dprintk("pathname4: "); path->ncomponents = 0; while (path->ncomponents < n) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) goto out_eio; - if (path->ncomponents != n) - dprintk("/"); - dprintk("%s", component->data); + if (unlikely(nfs_debug & NFSDBG_XDR)) + pr_cont("%s%.*s ", + (path->ncomponents != n ? "/ " : ""), + component->len, component->data); if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) path->ncomponents++; else { @@ -3573,14 +3574,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) } } out: - dprintk("\n"); return status; root_path: /* a root pathname is sent as a zero component4 */ path->ncomponents = 1; path->components[0].len=0; path->components[0].data=NULL; - dprintk("path /\n"); + dprintk("pathname4: /\n"); goto out; out_eio: dprintk(" status %d", status); @@ -3606,7 +3606,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st /* Ignore borken servers that return unrequested attrs */ if (unlikely(res == NULL)) goto out; - dprintk("%s: fsroot ", __func__); + dprintk("%s: fsroot:\n", __func__); status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) goto out; @@ -3627,7 +3627,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st m = be32_to_cpup(p); loc->nservers = 0; - dprintk("%s: servers ", __func__); + dprintk("%s: servers:\n", __func__); while (loc->nservers < m) { struct nfs4_string *server = &loc->servers[loc->nservers]; status = decode_opaque_inline(xdr, &server->len, &server->data); From a3ca5651cb5eebe2e56e510bbf5cd60abc301c9f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:00:40 -0500 Subject: [PATCH 245/316] NFS: Add debugging messages to NFSv4's CLOSE procedure CLOSE is new with NFSv4. Sometimes it's important to know the timing of this operation compared to things like lease renewal. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 90a17cc3ebc9..6c8e170e2e6b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1983,6 +1983,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; /* hmm. we are done with the inode, and in the process of freeing @@ -2010,6 +2011,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); + dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } static void nfs4_close_prepare(struct rpc_task *task, void *data) @@ -2018,6 +2020,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; int call_close = 0; + dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; @@ -2042,7 +2045,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; - return; + goto out; } if (calldata->arg.fmode == 0) { @@ -2051,7 +2054,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, task, NULL); - return; + goto out; } } @@ -2061,8 +2064,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->arg.seq_args, &calldata->res.seq_res, task)) - return; + goto out; rpc_call_start(task); +out: + dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_close_ops = { From 2446ab6070861aba2dd9229463ffbc40016a9f33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Mar 2012 17:00:56 -0500 Subject: [PATCH 246/316] SUNRPC: Use RCU to dereference the rpc_clnt.cl_xprt field A migration event will replace the rpc_xprt used by an rpc_clnt. To ensure this can be done safely, all references to cl_xprt must now use a form of rcu_dereference(). Special care is taken with rpc_peeraddr2str(), which returns a pointer to memory whose lifetime is the same as the rpc_xprt. Signed-off-by: Trond Myklebust [ cel: fix lockdep splats and layering violations ] [ cel: forward ported to 3.4 ] [ cel: remove rpc_max_reqs(), add rpc_net_ns() ] Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 9 +-- fs/nfs/client.c | 16 +++-- fs/nfs/nfs4namespace.c | 2 +- fs/nfs/nfs4proc.c | 13 +++- fs/nfs/nfs4state.c | 25 +++++--- fs/nfs/super.c | 5 ++ include/linux/sunrpc/clnt.h | 4 +- include/linux/sunrpc/debug.h | 13 ++++ net/sunrpc/auth_gss/auth_gss.c | 4 +- net/sunrpc/clnt.c | 110 ++++++++++++++++++++++++++------- net/sunrpc/rpc_pipe.c | 3 + net/sunrpc/rpcb_clnt.c | 15 +++-- net/sunrpc/stats.c | 6 +- 13 files changed, 175 insertions(+), 50 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0e0865e38065..1bb297243624 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - dprintk("NFS: GETATTR callback request from %s\n", + dprintk_rcu("NFS: GETATTR callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); inode = nfs_delegation_find_inode(cps->clp, &args->fh); @@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - dprintk("NFS: RECALL callback request from %s\n", + dprintk_rcu("NFS: RECALL callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); res = htonl(NFS4ERR_BADHANDLE); @@ -533,7 +534,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk("NFS: RECALL_ANY callback request from %s\n", + dprintk_rcu("NFS: RECALL_ANY callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); status = cpu_to_be32(NFS4ERR_INVAL); @@ -568,7 +569,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_max_slots); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1506adf4d4ed..d038dc5916e5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1284,16 +1284,18 @@ static int nfs4_init_callback(struct nfs_client *clp) int error; if (clp->rpc_ops->version == 4) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel( - clp->cl_rpcclient->cl_xprt, + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); if (error < 0) return error; } - error = nfs_callback_up(clp->cl_mvops->minor_version, - clp->cl_rpcclient->cl_xprt); + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); if (error < 0) { dprintk("%s: failed to start callback. Error = %d\n", __func__, error); @@ -1678,7 +1680,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->addrlen, parent_client->cl_ipaddr, data->authflavor, - parent_server->client->cl_xprt->prot, + rpc_protocol(parent_server->client), parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->net); @@ -1905,12 +1907,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v) if (clp->cl_cons_state != NFS_CS_READY) return 0; + rcu_read_lock(); seq_printf(m, "v%u %s %s %3d %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), atomic_read(&clp->cl_count), clp->cl_hostname); + rcu_read_unlock(); return 0; } @@ -1993,6 +1997,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); + rcu_read_lock(); seq_printf(m, "v%u %s %s %-7s %-17s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), @@ -2000,6 +2005,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) dev, fsid, nfs_server_fscache_state(server)); + rcu_read_unlock(); return 0; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 667ea7406fd3..9c8eca315f43 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -96,8 +96,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, static size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, size_t salen, struct nfs_server *server) { + struct net *net = rpc_net_ns(server->client); ssize_t ret; - struct net *net = server->client->cl_xprt->xprt_net; ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6c8e170e2e6b..671510cc14c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3833,6 +3833,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, *p = htonl((u32)clp->cl_boot_time.tv_nsec); for(;;) { + rcu_read_lock(); setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%s %s %s %u", clp->cl_ipaddr, @@ -3849,6 +3850,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); + rcu_read_unlock(); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status != -NFS4ERR_CLID_INUSE) @@ -5244,11 +5246,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) void nfs4_destroy_session(struct nfs4_session *session) { + struct rpc_xprt *xprt; + nfs4_proc_destroy_session(session); + + rcu_read_lock(); + xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); + rcu_read_unlock(); dprintk("%s Destroy backchannel for xprt %p\n", - __func__, session->clp->cl_rpcclient->cl_xprt); - xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, - NFS41_BC_MIN_CALLBACKS); + __func__, xprt); + xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); nfs4_destroy_slot_tables(session); kfree(session); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c1111a37dc14..bae959e294cd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1037,19 +1037,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; + char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); - task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); - if (!IS_ERR(task)) - return; - nfs4_clear_state_manager_bit(clp); - nfs_put_client(clp); - module_put(THIS_MODULE); + + /* The rcu_read_lock() is not strictly necessary, as the state + * manager is the only thread that ever changes the rpc_xprt + * after it's initialized. At this point, we're single threaded. */ + rcu_read_lock(); + snprintf(buf, sizeof(buf), "%s-manager", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + task = kthread_run(nfs4_run_state_manager, clp, buf); + if (IS_ERR(task)) { + printk(KERN_ERR "%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); + nfs4_clear_state_manager_bit(clp); + nfs_put_client(clp); + module_put(THIS_MODULE); + } } /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f4ccdae6a0cf..7002be11d99f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -701,8 +702,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, nfs_infop->nostr); } + rcu_read_lock(); seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); + rcu_read_unlock(); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); @@ -751,9 +754,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root) nfs_show_mount_options(m, nfss, 0); + rcu_read_lock(); seq_printf(m, ",addr=%s", rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); return 0; } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a4c62e95c720..e3d12b4a0314 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -35,7 +35,7 @@ struct rpc_clnt { struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ - struct rpc_xprt * cl_xprt; /* transport */ + struct rpc_xprt __rcu * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ @@ -156,6 +156,8 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int rpc_restart_call_prepare(struct rpc_task *); int rpc_restart_call(struct rpc_task *); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); +int rpc_protocol(struct rpc_clnt *); +struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index b506936f4ce6..6cb2517bcf75 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -50,19 +50,32 @@ extern unsigned int nlm_debug; #endif #define dprintk(args...) dfprintk(FACILITY, ## args) +#define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) #undef ifdebug #ifdef RPC_DEBUG # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) + # define dfprintk(fac, args...) \ do { \ ifdebug(fac) \ printk(KERN_DEFAULT args); \ } while (0) + +# define dfprintk_rcu(fac, args...) \ + do { \ + ifdebug(fac) { \ + rcu_read_lock(); \ + printk(KERN_DEFAULT args); \ + rcu_read_unlock(); \ + } \ + } while (0) + # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) # define dfprintk(fac, args...) do ; while (0) +# define dfprintk_rcu(fac, args...) do ; while (0) # define RPC_IFDEBUG(x) #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cb2e56452748..d3ad81f8da5b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -799,7 +799,7 @@ err_unlink_pipe_1: static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, struct rpc_auth *auth) { - struct net *net = clnt->cl_xprt->xprt_net; + struct net *net = rpc_net_ns(clnt); struct super_block *sb; sb = rpc_get_sb_net(net); @@ -813,7 +813,7 @@ static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, struct rpc_auth *auth) { - struct net *net = clnt->cl_xprt->xprt_net; + struct net *net = rpc_net_ns(clnt); struct super_block *sb; int err = 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 25c3da53fb69..7783fc0e7263 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -81,7 +82,8 @@ static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { - struct sunrpc_net *sn = net_generic(clnt->cl_xprt->xprt_net, sunrpc_net_id); + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); spin_lock(&sn->rpc_client_lock); list_add(&clnt->cl_clients, &sn->all_clients); @@ -90,7 +92,8 @@ static void rpc_register_client(struct rpc_clnt *clnt) static void rpc_unregister_client(struct rpc_clnt *clnt) { - struct sunrpc_net *sn = net_generic(clnt->cl_xprt->xprt_net, sunrpc_net_id); + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); spin_lock(&sn->rpc_client_lock); list_del(&clnt->cl_clients); @@ -109,12 +112,13 @@ static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { + struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; - pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); + pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { __rpc_clnt_remove_pipedir(clnt); - rpc_put_sb_net(clnt->cl_xprt->xprt_net); + rpc_put_sb_net(net); } } @@ -155,17 +159,18 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, static int rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) { + struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; struct dentry *dentry; clnt->cl_dentry = NULL; if (dir_name == NULL) return 0; - pipefs_sb = rpc_get_sb_net(clnt->cl_xprt->xprt_net); + pipefs_sb = rpc_get_sb_net(net); if (!pipefs_sb) return 0; dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); - rpc_put_sb_net(clnt->cl_xprt->xprt_net); + rpc_put_sb_net(net); if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; @@ -295,7 +300,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru if (clnt->cl_server == NULL) goto out_no_server; - clnt->cl_xprt = xprt; + rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; @@ -310,7 +315,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru INIT_LIST_HEAD(&clnt->cl_tasks); spin_lock_init(&clnt->cl_lock); - if (!xprt_bound(clnt->cl_xprt)) + if (!xprt_bound(xprt)) clnt->cl_autobind = 1; clnt->cl_timeout = xprt->timeout; @@ -477,6 +482,7 @@ struct rpc_clnt * rpc_clone_client(struct rpc_clnt *clnt) { struct rpc_clnt *new; + struct rpc_xprt *xprt; int err = -ENOMEM; new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); @@ -499,18 +505,25 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (xprt == NULL) + goto out_no_transport; + rcu_assign_pointer(new->cl_xprt, xprt); atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - xprt_get(clnt->cl_xprt); atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; out_no_path: + xprt_put(xprt); +out_no_transport: kfree(new->cl_principal); out_no_principal: rpc_free_iostats(new->cl_metrics); @@ -590,7 +603,7 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; - xprt_put(clnt->cl_xprt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); rpciod_down(); kfree(clnt); } @@ -879,13 +892,18 @@ EXPORT_SYMBOL_GPL(rpc_call_start); size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) { size_t bytes; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; - bytes = sizeof(xprt->addr); + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + + bytes = xprt->addrlen; if (bytes > bufsize) bytes = bufsize; - memcpy(buf, &clnt->cl_xprt->addr, bytes); - return xprt->addrlen; + memcpy(buf, &xprt->addr, bytes); + rcu_read_unlock(); + + return bytes; } EXPORT_SYMBOL_GPL(rpc_peeraddr); @@ -894,11 +912,16 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); * @clnt: RPC client structure * @format: address format * + * NB: the lifetime of the memory referenced by the returned pointer is + * the same as the rpc_xprt itself. As long as the caller uses this + * pointer, it must hold the RCU read lock. */ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; + + xprt = rcu_dereference(clnt->cl_xprt); if (xprt->address_strings[format] != NULL) return xprt->address_strings[format]; @@ -910,14 +933,51 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr2str); void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); if (xprt->ops->set_buffer_size) xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_setbufsize); -/* - * Return size of largest payload RPC client can support, in bytes +/** + * rpc_protocol - Get transport protocol number for an RPC client + * @clnt: RPC client to query + * + */ +int rpc_protocol(struct rpc_clnt *clnt) +{ + int protocol; + + rcu_read_lock(); + protocol = rcu_dereference(clnt->cl_xprt)->prot; + rcu_read_unlock(); + return protocol; +} +EXPORT_SYMBOL_GPL(rpc_protocol); + +/** + * rpc_net_ns - Get the network namespace for this RPC client + * @clnt: RPC client to query + * + */ +struct net *rpc_net_ns(struct rpc_clnt *clnt) +{ + struct net *ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->xprt_net; + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_net_ns); + +/** + * rpc_max_payload - Get maximum payload size for a transport, in bytes + * @clnt: RPC client to query * * For stream transports, this is one RPC record fragment (see RFC * 1831), as we don't support multi-record requests yet. For datagram @@ -926,7 +986,12 @@ EXPORT_SYMBOL_GPL(rpc_setbufsize); */ size_t rpc_max_payload(struct rpc_clnt *clnt) { - return clnt->cl_xprt->max_payload; + size_t ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->max_payload; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL_GPL(rpc_max_payload); @@ -937,8 +1002,11 @@ EXPORT_SYMBOL_GPL(rpc_max_payload); */ void rpc_force_rebind(struct rpc_clnt *clnt) { - if (clnt->cl_autobind) - xprt_clear_bound(clnt->cl_xprt); + if (clnt->cl_autobind) { + rcu_read_lock(); + xprt_clear_bound(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + } } EXPORT_SYMBOL_GPL(rpc_force_rebind); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ac9ee1590739..3d30943ed6db 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -402,12 +403,14 @@ rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; + rcu_read_lock(); seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); + rcu_read_unlock(); return 0; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b1f08bd67883..4f8af63798a2 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -620,9 +620,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) { struct rpc_clnt *parent = clnt->cl_parent; + struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt); while (parent != clnt) { - if (parent->cl_xprt != clnt->cl_xprt) + if (rcu_dereference(parent->cl_xprt) != xprt) break; if (clnt->cl_autobind) break; @@ -653,8 +654,12 @@ void rpcb_getport_async(struct rpc_task *task) size_t salen; int status; - clnt = rpcb_find_transport_owner(task->tk_client); - xprt = clnt->cl_xprt; + rcu_read_lock(); + do { + clnt = rpcb_find_transport_owner(task->tk_client); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + } while (xprt == NULL); + rcu_read_unlock(); dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, @@ -667,6 +672,7 @@ void rpcb_getport_async(struct rpc_task *task) if (xprt_test_and_set_binding(xprt)) { dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); + xprt_put(xprt); return; } @@ -734,7 +740,7 @@ void rpcb_getport_async(struct rpc_task *task) switch (bind_version) { case RPCBVERS_4: case RPCBVERS_3: - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; @@ -763,6 +769,7 @@ bailout_release_client: bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); task->tk_status = status; + xprt_put(xprt); } EXPORT_SYMBOL_GPL(rpcb_getport_async); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1eb3304bc105..bc2068ee795b 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "netns.h" @@ -179,7 +180,7 @@ static void _print_name(struct seq_file *seq, unsigned int op, void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) { struct rpc_iostats *stats = clnt->cl_metrics; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; unsigned int op, maxproc = clnt->cl_maxproc; if (!stats) @@ -189,8 +190,11 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) seq_printf(seq, "p/v: %u/%u (%s)\n", clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); if (xprt) xprt->ops->print_stats(xprt, seq); + rcu_read_unlock(); seq_printf(seq, "\tper-op statistics\n"); for (op = 0; op < maxproc; op++) { From 4e0038b6b246e4145fc4a53dca61a556d17bc52c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Mar 2012 17:01:05 -0500 Subject: [PATCH 247/316] SUNRPC: Move clnt->cl_server into struct rpc_xprt When the cl_xprt field is updated, the cl_server field will also have to change. Since the contents of cl_server follow the remote endpoint of cl_xprt, just move that field to the rpc_xprt. Signed-off-by: Trond Myklebust [ cel: simplify check_gss_callback_principal(), whitespace changes ] [ cel: forward ported to 3.4 ] Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 3 +- fs/nfs/nfs4proc.c | 3 +- include/linux/sunrpc/clnt.h | 1 - include/linux/sunrpc/xprt.h | 2 + net/sunrpc/clnt.c | 88 ++++++++++++++++++------------------- net/sunrpc/rpc_pipe.c | 3 +- net/sunrpc/rpcb_clnt.c | 4 +- net/sunrpc/xprt.c | 15 ++++++- 8 files changed, 66 insertions(+), 53 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 4a122ae71762..2afe23349c7b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion) int check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { - struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) @@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (memcmp(p, "nfs@", 4) != 0) return 0; p += 4; - if (strcmp(p, r->cl_server) != 0) + if (strcmp(p, clp->cl_hostname) != 0) return 0; return 1; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 671510cc14c0..54767dd66cf9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1100,6 +1100,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (state == NULL) goto err_put_inode; if (data->o_res.delegation_type != 0) { + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; int delegation_flags = 0; rcu_read_lock(); @@ -1111,7 +1112,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - NFS_CLIENT(inode)->cl_server); + clp->cl_hostname); } else if ((delegation_flags & 1UL<inode, data->owner->so_cred, diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e3d12b4a0314..acd5502a8190 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -41,7 +41,6 @@ struct rpc_clnt { cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ - const char * cl_server; /* server machine name */ const char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ struct rpc_stat * cl_stats; /* per-program statistics */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ea712f97f4a1..77d278defa70 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -229,6 +229,7 @@ struct rpc_xprt { } stat; struct net *xprt_net; + const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; }; @@ -258,6 +259,7 @@ struct xprt_create { struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; + const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7783fc0e7263..e39ace9a4e1d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -265,15 +265,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - size_t len; /* sanity check the name before trying to print it */ - err = -EINVAL; - len = strlen(args->servername); - if (len > RPC_MAXNETNAMELEN) - goto out_no_rpciod; - len++; - dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, args->servername, xprt); @@ -296,10 +289,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_err; clnt->cl_parent = clnt; - clnt->cl_server = kstrdup(args->servername, GFP_KERNEL); - if (clnt->cl_server == NULL) - goto out_no_server; - rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; @@ -363,8 +352,6 @@ out_no_path: out_no_principal: rpc_free_iostats(clnt->cl_metrics); out_no_stats: - kfree(clnt->cl_server); -out_no_server: kfree(clnt); out_err: xprt_put(xprt); @@ -394,6 +381,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, + .servername = args->servername, .bc_xprt = args->bc_xprt, }; char servername[48]; @@ -402,7 +390,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. */ - if (args->servername == NULL) { + if (xprtargs.servername == NULL) { struct sockaddr_un *sun = (struct sockaddr_un *)args->address; struct sockaddr_in *sin = @@ -429,7 +417,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * address family isn't recognized. */ return ERR_PTR(-EINVAL); } - args->servername = servername; + xprtargs.servername = servername; } xprt = xprt_create_transport(&xprtargs); @@ -488,9 +476,6 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - new->cl_server = kstrdup(clnt->cl_server, GFP_KERNEL); - if (new->cl_server == NULL) - goto out_no_server; new->cl_parent = clnt; /* Turn off autobind on clones */ new->cl_autobind = 0; @@ -528,8 +513,6 @@ out_no_transport: out_no_principal: rpc_free_iostats(new->cl_metrics); out_no_stats: - kfree(new->cl_server); -out_no_server: kfree(new); out_no_clnt: dprintk("RPC: %s: returned error %d\n", __func__, err); @@ -574,8 +557,9 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks); */ void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s\n", - clnt->cl_protname, clnt->cl_server); + dprintk_rcu("RPC: shutting down %s client for %s\n", + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); @@ -593,11 +577,11 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); static void rpc_free_client(struct rpc_clnt *clnt) { - dprintk("RPC: destroying %s client for %s\n", - clnt->cl_protname, clnt->cl_server); + dprintk_rcu("RPC: destroying %s client for %s\n", + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - kfree(clnt->cl_server); rpc_unregister_client(clnt); rpc_clnt_remove_pipedir(clnt); rpc_free_iostats(clnt->cl_metrics); @@ -1685,8 +1669,11 @@ call_timeout(struct rpc_task *task) } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); if (task->tk_flags & RPC_TASK_TIMEOUT) rpc_exit(task, -ETIMEDOUT); else @@ -1696,9 +1683,13 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - if (clnt->cl_chatty) + if (clnt->cl_chatty) { + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); + } } rpc_force_rebind(clnt); /* @@ -1727,9 +1718,13 @@ call_decode(struct rpc_task *task) dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - if (clnt->cl_chatty) + if (clnt->cl_chatty) { + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); + } task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1807,6 +1802,7 @@ rpc_encode_header(struct rpc_task *task) static __be32 * rpc_verify_header(struct rpc_task *task) { + struct rpc_clnt *clnt = task->tk_client; struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; __be32 *p = iov->iov_base; @@ -1879,8 +1875,11 @@ rpc_verify_header(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: + rcu_read_lock(); printk(KERN_NOTICE "RPC: server %s requires stronger " - "authentication.\n", task->tk_client->cl_server); + "authentication.\n", + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); break; default: dprintk("RPC: %5u %s: unknown auth error: %x\n", @@ -1903,28 +1902,27 @@ rpc_verify_header(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - dprintk("RPC: %5u %s: program %u is unsupported by server %s\n", - task->tk_pid, __func__, - (unsigned int)task->tk_client->cl_prog, - task->tk_client->cl_server); + dprintk_rcu("RPC: %5u %s: program %u is unsupported " + "by server %s\n", task->tk_pid, __func__, + (unsigned int)clnt->cl_prog, + rcu_dereference(clnt->cl_xprt)->servername); error = -EPFNOSUPPORT; goto out_err; case RPC_PROG_MISMATCH: - dprintk("RPC: %5u %s: program %u, version %u unsupported by " - "server %s\n", task->tk_pid, __func__, - (unsigned int)task->tk_client->cl_prog, - (unsigned int)task->tk_client->cl_vers, - task->tk_client->cl_server); + dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported " + "by server %s\n", task->tk_pid, __func__, + (unsigned int)clnt->cl_prog, + (unsigned int)clnt->cl_vers, + rcu_dereference(clnt->cl_xprt)->servername); error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %s unsupported by program %u, " + dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, rpc_proc_name(task), - task->tk_client->cl_prog, - task->tk_client->cl_vers, - task->tk_client->cl_server); + clnt->cl_prog, clnt->cl_vers, + rcu_dereference(clnt->cl_xprt)->servername); error = -EOPNOTSUPP; goto out_err; case RPC_GARBAGE_ARGS: @@ -1938,7 +1936,7 @@ rpc_verify_header(struct rpc_task *task) } out_garbage: - task->tk_client->cl_stats->rpcgarbage++; + clnt->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; dprintk("RPC: %5u %s: retrying\n", diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 3d30943ed6db..7d96e3cd57cc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -404,7 +404,8 @@ rpc_show_info(struct seq_file *m, void *v) struct rpc_clnt *clnt = m->private; rcu_read_lock(); - seq_printf(m, "RPC server: %s\n", clnt->cl_server); + seq_printf(m, "RPC server: %s\n", + rcu_dereference(clnt->cl_xprt)->servername); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4f8af63798a2..e699ff0ce909 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -663,7 +663,7 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, - clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); + xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ @@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_server, sap, salen, + rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen, xprt->prot, bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 32e37945a840..0cbcd1ab49ab 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -66,6 +66,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -751,7 +752,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - task->tk_client->cl_server); + xprt->servername); xprt_release_write(xprt, task); task->tk_status = -EIO; } @@ -1229,6 +1230,17 @@ found: (unsigned long)xprt); else init_timer(&xprt->timer); + + if (strlen(args->servername) > RPC_MAXNETNAMELEN) { + xprt_destroy(xprt); + return ERR_PTR(-EINVAL); + } + xprt->servername = kstrdup(args->servername, GFP_KERNEL); + if (xprt->servername == NULL) { + xprt_destroy(xprt); + return ERR_PTR(-ENOMEM); + } + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1251,6 +1263,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); + kfree(xprt->servername); /* * Tear down transport state and free the rpc_xprt */ From 2e738fdce22f9a7edf20281fd2d768ef9785922e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:01:14 -0500 Subject: [PATCH 248/316] SUNRPC: Add API to acquire source address NFSv4.0 clients must send endpoint information for their callback service to NFSv4.0 servers during their first contact with a server. Traditionally on Linux, user space provides the callback endpoint IP address via the "clientaddr=" mount option. During an NFSv4 migration event, it is possible that an FSID may be migrated to a destination server that is accessible via a different source IP address than the source server was. The client must update callback endpoint information on the destination server so that it can maintain leases and allow delegation. Without a new "clientaddr=" option from user space, however, the kernel itself must construct an appropriate IP address for the callback update. Provide an API in the RPC client for upper layer RPC consumers to acquire a source address for a remote. The mechanism used by the mount.nfs command is copied: set up a connected UDP socket to the designated remote, then scrape the source address off the socket. We are careful to select the correct network namespace when setting up the temporary UDP socket. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 149 ++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index acd5502a8190..523547ecfee2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -161,6 +161,7 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); +int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); size_t rpc_ntop(const struct sockaddr *, char *, const size_t); size_t rpc_pton(struct net *, const char *, const size_t, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e39ace9a4e1d..7a4cb5fdc212 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -914,6 +914,155 @@ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, } EXPORT_SYMBOL_GPL(rpc_peeraddr2str); +static const struct sockaddr_in rpc_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), +}; + +static const struct sockaddr_in6 rpc_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, +}; + +/* + * Try a getsockname() on a connected datagram socket. Using a + * connected datagram socket prevents leaving a socket in TIME_WAIT. + * This conserves the ephemeral port number space. + * + * Returns zero and fills in "buf" if successful; otherwise, a + * negative errno is returned. + */ +static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, + struct sockaddr *buf, int buflen) +{ + struct socket *sock; + int err; + + err = __sock_create(net, sap->sa_family, + SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create UDP socket (%d)\n", err); + goto out; + } + + switch (sap->sa_family) { + case AF_INET: + err = kernel_bind(sock, + (struct sockaddr *)&rpc_inaddr_loopback, + sizeof(rpc_inaddr_loopback)); + break; + case AF_INET6: + err = kernel_bind(sock, + (struct sockaddr *)&rpc_in6addr_loopback, + sizeof(rpc_in6addr_loopback)); + break; + default: + err = -EAFNOSUPPORT; + goto out; + } + if (err < 0) { + dprintk("RPC: can't bind UDP socket (%d)\n", err); + goto out_release; + } + + err = kernel_connect(sock, sap, salen, 0); + if (err < 0) { + dprintk("RPC: can't connect UDP socket (%d)\n", err); + goto out_release; + } + + err = kernel_getsockname(sock, buf, &buflen); + if (err < 0) { + dprintk("RPC: getsockname failed (%d)\n", err); + goto out_release; + } + + err = 0; + if (buf->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf; + sin6->sin6_scope_id = 0; + } + dprintk("RPC: %s succeeded\n", __func__); + +out_release: + sock_release(sock); +out: + return err; +} + +/* + * Scraping a connected socket failed, so we don't have a useable + * local address. Fallback: generate an address that will prevent + * the server from calling us back. + * + * Returns zero and fills in "buf" if successful; otherwise, a + * negative errno is returned. + */ +static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) +{ + switch (family) { + case AF_INET: + if (buflen < sizeof(rpc_inaddr_loopback)) + return -EINVAL; + memcpy(buf, &rpc_inaddr_loopback, + sizeof(rpc_inaddr_loopback)); + break; + case AF_INET6: + if (buflen < sizeof(rpc_in6addr_loopback)) + return -EINVAL; + memcpy(buf, &rpc_in6addr_loopback, + sizeof(rpc_in6addr_loopback)); + default: + dprintk("RPC: %s: address family not supported\n", + __func__); + return -EAFNOSUPPORT; + } + dprintk("RPC: %s: succeeded\n", __func__); + return 0; +} + +/** + * rpc_localaddr - discover local endpoint address for an RPC client + * @clnt: RPC client structure + * @buf: target buffer + * @buflen: size of target buffer, in bytes + * + * Returns zero and fills in "buf" and "buflen" if successful; + * otherwise, a negative errno is returned. + * + * This works even if the underlying transport is not currently connected, + * or if the upper layer never previously provided a source address. + * + * The result of this function call is transient: multiple calls in + * succession may give different results, depending on how local + * networking configuration changes over time. + */ +int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_xprt *xprt; + struct net *net; + size_t salen; + int err; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + salen = xprt->addrlen; + memcpy(sap, &xprt->addr, salen); + net = get_net(xprt->xprt_net); + rcu_read_unlock(); + + rpc_set_port(sap, 0); + err = rpc_sockname(net, sap, salen, buf, buflen); + put_net(net); + if (err != 0) + /* Couldn't discover local address, return ANYADDR */ + return rpc_anyaddr(sap->sa_family, buf, buflen); + return 0; +} +EXPORT_SYMBOL_GPL(rpc_localaddr); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { From 31b8e2aec099f22d40277c424d8c24b2a4c95fce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:01:23 -0500 Subject: [PATCH 249/316] NFS: Make clientaddr= optional For NFSv4 mounts, the clientaddr= mount option has always been required. Now we have rpc_localaddr() in the kernel, which was modeled after the same logic in the mount.nfs command that constructs the clientaddr= mount option. If user space doesn't provide a clientaddr= mount option, the kernel can now construct its own. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 15 +++++++++++++++ fs/nfs/super.c | 6 ------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d038dc5916e5..d30dcbfb6b20 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1346,6 +1346,7 @@ int nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour, int noresvport) { + char buf[INET6_ADDRSTRLEN + 1]; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -1361,6 +1362,20 @@ int nfs4_init_client(struct nfs_client *clp, 1, noresvport); if (error < 0) goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (error < 0) + goto error; + error = rpc_ntop(sap, buf, sizeof(buf)); + if (error < 0) + goto error; + ip_addr = (const char *)buf; + } strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); error = nfs_idmap_new(clp); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7002be11d99f..3935a371f5a0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2557,12 +2557,6 @@ static int nfs4_validate_text_mount_data(void *options, return -EINVAL; } - if (args->client_address == NULL) { - dfprintk(MOUNT, - "NFS4: mount program didn't pass callback address\n"); - return -EINVAL; - } - return nfs_parse_devname(dev_name, &args->nfs_server.hostname, NFS4_MAXNAMLEN, From 20d27e929fb4790a339a4ddcc9a27f14db06055b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:01:31 -0500 Subject: [PATCH 250/316] NFS: Add a client-side function to display NFS file handles For debugging, introduce a simplistic function to print NFS file handles on the system console. The main function is hooked into the dprintk debugging facility, but you can directly call the helper, _nfs_display_fhandle(), if you want to print a handle unconditionally. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 45 ++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 14 +++++++++++++ 2 files changed, 59 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6c662598f885..99a4f52c14b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1045,6 +1045,51 @@ struct nfs_fh *nfs_alloc_fhandle(void) return fh; } +/** + * _nfs_display_fhandle - display an NFS file handle on the console + * + * @fh: file handle to display + * @caption: display caption + * + * For debugging only. + */ +#ifdef RPC_DEBUG +void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) +{ + unsigned short i; + + if (fh->size == 0 || fh == NULL) { + printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); + return; + } + + printk(KERN_DEFAULT "%s at %p is %u bytes:\n", caption, fh, fh->size); + for (i = 0; i < fh->size; i += 16) { + __be32 *pos = (__be32 *)&fh->data[i]; + + switch ((fh->size - i - 1) >> 2) { + case 0: + printk(KERN_DEFAULT " %08x\n", + be32_to_cpup(pos)); + break; + case 1: + printk(KERN_DEFAULT " %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1)); + break; + case 2: + printk(KERN_DEFAULT " %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2)); + break; + default: + printk(KERN_DEFAULT " %08x %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); + } + } +} +#endif + /** * nfs_inode_attrs_need_update - check if the inode attributes need updating * @inode - pointer to inode diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8c29950d2fa5..c07a757649dc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -395,6 +395,20 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh) kfree(fh); } +#ifdef RPC_DEBUG +extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption); +#define nfs_display_fhandle(fh, caption) \ + do { \ + if (unlikely(nfs_debug & NFSDBG_FACILITY)) \ + _nfs_display_fhandle(fh, caption); \ + } while (0) +#else +static inline void nfs_display_fhandle(const struct nfs_fh *fh, + const char *caption) +{ +} +#endif + /* * linux/fs/nfs/nfsroot.c */ From bb4dae5e5b5a92f0ffbcc6ac10c5e8afcd87934d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:01:48 -0500 Subject: [PATCH 251/316] NFS: Simplify arguments of encode_renew() Clean up: pass just the clientid4 to encode_renew(). This enables it to be used by callers who might not have an full nfs_client. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b5c5212cd184..48f539314f25 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1637,13 +1637,14 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co hdr->replen += decode_rename_maxsz; } -static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) +static void encode_renew(struct xdr_stream *xdr, clientid4 clid, + struct compound_hdr *hdr) { __be32 *p; p = reserve_space(xdr, 12); *p++ = cpu_to_be32(OP_RENEW); - xdr_encode_hyper(p, client_stateid->cl_clientid); + xdr_encode_hyper(p, clid); hdr->nops++; hdr->replen += decode_renew_maxsz; } @@ -2692,7 +2693,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr, }; encode_compound_hdr(xdr, req, &hdr); - encode_renew(xdr, clp, &hdr); + encode_renew(xdr, clp->cl_clientid, &hdr); encode_nops(&hdr); } From 81934ddb8eb62a85b8015c0f2b824a88510965a2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:01:57 -0500 Subject: [PATCH 252/316] NFS: Introduce NFS_ATTR_FATTR_V4_LOCATIONS The Linux NFS client must distinguish between referral events (which it currently supports) and migration events (which it does not yet support). In both types of events, an fs_locations array is returned. But upper layers, not the XDR layer, should make the distinction between a referral and a migration. There really isn't a way for an XDR decoder function to distinguish the two, in general. Slightly adjust the FATTR flags returned by decode_fs_locations() to set NFS_ATTR_FATTR_V4_LOCATIONS only if a non-empty locations array was returned from the server. Then have logic in nfs4proc.c distinguish whether the locations array is for a referral or something else. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 11 ++++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 54767dd66cf9..281c2def2b19 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -79,6 +79,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, @@ -2340,7 +2341,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_map_errors(status); } -static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); /* * Get locations and (maybe) other attributes of a referral. * Note that we'll actually follow the referral later when @@ -4797,11 +4797,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || (fattr->valid & NFS_ATTR_FATTR_FILEID)) && (fattr->valid & NFS_ATTR_FATTR_FSID) && - (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) + (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS))) return; fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | - NFS_ATTR_FATTR_NLINK; + NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL; fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; fattr->nlink = 2; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 48f539314f25..a6fb55da874c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3660,7 +3660,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st res->nlocations++; } if (res->nlocations != 0) - status = NFS_ATTR_FATTR_V4_REFERRAL; + status = NFS_ATTR_FATTR_V4_LOCATIONS; out: dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 046c1bfddc33..210da5dc4f17 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -88,11 +88,12 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_PRECTIME (1U << 16) #define NFS_ATTR_FATTR_CHANGE (1U << 17) #define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 20) /* Treat as mountpoint */ -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 21) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 22) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 23) +#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) +#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) +#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) +#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) +#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ From 264e6351c59d22303582c45d79f0a5735f51d8d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Mar 2012 17:02:05 -0500 Subject: [PATCH 253/316] NFS: Request fh_expire_type attribute in "server caps" operation The fh_expire_type file attribute is a filesystem wide attribute that consists of flags that indicate what characteristics file handles on this FSID have. Our client doesn't support volatile file handles. It should find out early (say, at mount time) whether the server is going to play shenanighans with file handles during a migration. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 26 ++++++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 3 +++ include/linux/nfs_xdr.h | 1 + 4 files changed, 31 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 281c2def2b19..87b9b91f76cf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2220,6 +2220,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; + server->fh_expire_type = res.fh_expire_type; } return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a6fb55da874c..3e0fe9f92e7c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2676,6 +2676,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + FATTR4_WORD0_FH_EXPIRE_TYPE| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| FATTR4_WORD0_ACLSUPPORT, &hdr); @@ -3223,6 +3224,28 @@ out_overflow: return -EIO; } +static int decode_attr_fh_expire_type(struct xdr_stream *xdr, + uint32_t *bitmap, uint32_t *type) +{ + __be32 *p; + + *type = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *type = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE; + } + dprintk("%s: expire type=0x%x\n", __func__, *type); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) { __be32 *p; @@ -4271,6 +4294,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_fh_expire_type(xdr, bitmap, + &res->fh_expire_type)) != 0) + goto xdr_error; if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) goto xdr_error; if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 03d0b91c2d5b..7073fc74481c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -148,6 +148,9 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 210da5dc4f17..6f4c35941965 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -977,6 +977,7 @@ struct nfs4_server_caps_res { u32 acl_bitmask; u32 has_links; u32 has_symlinks; + u32 fh_expire_type; struct nfs4_sequence_res seq_res; }; From 54b50af089552bae368502e35dead67e81129b8d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 2 Mar 2012 16:58:56 -0500 Subject: [PATCH 254/316] NFS: Reduce debugging noise from encode_compound_hdr Get rid of encode_compound: tag= when XDR debugging is enabled. The current Linux client never sets compound tags. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3e0fe9f92e7c..7d3ba1ff787c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -906,7 +906,6 @@ static void encode_compound_hdr(struct xdr_stream *xdr, * but this is not required as a MUST for the server to do so. */ hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; - dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); p = reserve_space(xdr, 4 + hdr->taglen + 8); p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); From 91e56aaedd7ebceacde782a3921fadef4b5d0e1c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 5 Mar 2012 14:58:15 -0500 Subject: [PATCH 255/316] NFS: Undo changes to idmap.h When compiled without NFS v4 configured these function won't be defined and the compiler will yell. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_idmap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 717fa5019e75..7eed2012d288 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -69,8 +69,19 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; +#ifdef CONFIG_NFS_V4 int nfs_idmap_init(void); void nfs_idmap_quit(void); +#else +static inline int nfs_idmap_init(void) +{ + return 0; +} + +static inline void nfs_idmap_quit(void) +{} +#endif + int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); From 7e03b7cc0736eefe7471782c344112ad6eba951e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:12:57 -0500 Subject: [PATCH 256/316] NFS: Fix a compile issue when !CONFIG_NFS_V4_1 The attempt to display the implementation ID needs to be conditional on whether or not CONFIG_NFS_V4_1 is defined Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3935a371f5a0..aac403085be5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -775,7 +775,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {} #endif #endif -#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4_1 static void show_pnfs(struct seq_file *m, struct nfs_server *server) { @@ -785,9 +784,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) else seq_printf(m, "not configured"); } + +static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) +{ + if (nfss->nfs_client && nfss->nfs_client->impl_id) { + struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; + seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," + "date='%llu,%u'", + impl_id->name, impl_id->domain, + impl_id->date.seconds, impl_id->date.nseconds); + } +} #else -static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} +#ifdef CONFIG_NFS_V4 +static void show_pnfs(struct seq_file *m, struct nfs_server *server) +{ +} #endif +static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) +{ +} #endif static int nfs_show_devname(struct seq_file *m, struct dentry *root) @@ -836,13 +852,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); - if (nfss->nfs_client && nfss->nfs_client->impl_id) { - struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; - seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," - "date='%llu,%u'", - impl_id->name, impl_id->domain, - impl_id->date.seconds, impl_id->date.nseconds); - } + show_implementation_id(m, nfss); seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); From fa68a1ba1de349f0d1fcc54171b95236efe24148 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 6 Mar 2012 10:14:35 -0500 Subject: [PATCH 257/316] NFS: Fix a typo in _nfs_display_fhandle The check for 'fh == NULL' needs to come _before_ we dereference fh. Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 99a4f52c14b2..ba03b7908149 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1058,7 +1058,7 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) { unsigned short i; - if (fh->size == 0 || fh == NULL) { + if (fh == NULL || fh->size == 0) { printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); return; } From a1d0b5eebc4fd6e0edb02688b35f17f67f42aea5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2012 19:56:44 -0500 Subject: [PATCH 258/316] NFS: Properly handle the case where the delegation is revoked If we know that the delegation stateid is bad or revoked, we need to remove that delegation as soon as possible, and then mark all the stateids that relied on that delegation for recovery. We cannot use the delegation as part of the recovery process. Also note that NFSv4.1 uses a different error code (NFS4ERR_DELEG_REVOKED) to indicate that the delegation was revoked. Finally, ensure that setlk() and setattr() can both recover safely from a revoked delegation. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/delegation.c | 11 +++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 18 ++++++++++++++++-- fs/nfs/nfs4state.c | 29 +++++++++++++++++++++++++++-- 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f2654069806..ac889af8ccf5 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -466,6 +466,17 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +void nfs_remove_bad_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); + if (delegation) { + nfs_inode_find_state_and_recover(inode, &delegation->stateid); + nfs_free_delegation(delegation); + } +} + /** * nfs_expire_all_delegation_types * @clp: client to process diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d9322e490c56..691a79609184 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -45,6 +45,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); +void nfs_remove_bad_delegation(struct inode *inode); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 19079ec8252c..7ddad3fa4074 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -317,6 +317,8 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); +extern void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea7adfc868c2..f31fcea1af7e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -268,8 +268,11 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc switch(errorcode) { case 0: return 0; + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -1331,8 +1334,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + nfs_inode_find_state_and_recover(state->inode, + stateid); nfs4_schedule_stateid_recovery(server, state); case -EKEYEXPIRED: /* @@ -1931,7 +1937,9 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + }; int err; do { err = nfs4_handle_exception(server, @@ -3760,8 +3768,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (task->tk_status >= 0) return 0; switch(task->tk_status) { + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -4604,7 +4615,9 @@ out: static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + }; int err; do { @@ -4697,6 +4710,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2f760604246f..d60e7ad2690e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1106,12 +1106,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; - if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) - nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } +void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + bool found = false; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; + nfs4_state_mark_reclaim_nograce(clp, state); + found = true; + } + spin_unlock(&inode->i_lock); + if (found) + nfs4_schedule_state_manager(clp); +} + + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; From 8e663f0e5fabf57065aed1cfdaff5b13057dce23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 259/316] NFSv4.1: Fix matching of the stateids when returning a delegation nfs41_validate_delegation_stateid is broken if we supply a stateid with a non-zero sequence id. Instead of trying to match the sequence id, the function assumes that we always want to error. While this is true for a delegation callback, it is not true in general. Also fix a typo in nfs4_callback_recall. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 10 +++++----- fs/nfs/delegation.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 1bb297243624..ea8321923f28 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -87,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, res = 0; break; case -ENOENT: - if (res != 0) - res = htonl(NFS4ERR_BAD_STATEID); + res = htonl(NFS4ERR_BAD_STATEID); break; default: res = htonl(NFS4ERR_RESOURCE); @@ -325,10 +324,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n if (delegation == NULL) return 0; - if (stateid->stateid.seqid != 0) + if (stateid->stateid.seqid != 0 && + stateid->stateid.seqid != delegation->stateid.stateid.seqid) return 0; - if (memcmp(&delegation->stateid.stateid.other, - &stateid->stateid.other, + if (memcmp(delegation->stateid.stateid.other, + stateid->stateid.other, NFS4_STATEID_OTHER_SIZE)) return 0; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ac889af8ccf5..c14512cea798 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -542,7 +542,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) /** * nfs_async_inode_return_delegation - asynchronously return a delegation * @inode: inode to process - * @stateid: state ID information from CB_RECALL arguments + * @stateid: state ID information * * Returns zero on success, or a negative errno value. */ From 36281caa839f4441c793c81d2e3cc5ea44ad5aa2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 260/316] NFSv4: Further clean-ups of delegation stateid validation Change the name to reflect what we're really doing: testing two stateids for whether or not they match according the the rules in RFC3530 and RFC5661. Move the code from callback_proc.c to nfs4proc.c Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 24 ------------------------ fs/nfs/delegation.c | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 27 +++++++++++++++++++++++++-- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ea8321923f28..1b5d809a105e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -98,14 +98,6 @@ out: return res; } -int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) -{ - if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, - sizeof(delegation->stateid.data)) != 0) - return 0; - return 1; -} - #if defined(CONFIG_NFS_V4_1) /* @@ -319,22 +311,6 @@ out: return res; } -int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) -{ - if (delegation == NULL) - return 0; - - if (stateid->stateid.seqid != 0 && - stateid->stateid.seqid != delegation->stateid.stateid.seqid) - return 0; - if (memcmp(delegation->stateid.stateid.other, - stateid->stateid.other, - NFS4_STATEID_OTHER_SIZE)) - return 0; - - return 1; -} - /* * Validate the sequenceID sent by the server. * Return success if the sequenceID is one more than what we last saw on diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c14512cea798..c7249e26e2e9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -556,7 +556,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { rcu_read_unlock(); return -ENOENT; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7ddad3fa4074..624d4becf017 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -43,7 +43,7 @@ struct nfs4_minor_version_ops { struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); - int (*validate_stateid)(struct nfs_delegation *, + bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f31fcea1af7e..b0647b387403 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6271,8 +6271,31 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) } while (exception.retry); return err; } + +static bool nfs41_match_stateid(const nfs4_stateid *s1, + const nfs4_stateid *s2) +{ + if (memcmp(s1->stateid.other, s2->stateid.other, + sizeof(s1->stateid.other)) != 0) + return false; + + if (s1->stateid.seqid == s2->stateid.seqid) + return true; + if (s1->stateid.seqid == 0 || s2->stateid.seqid == 0) + return true; + + return false; +} + #endif /* CONFIG_NFS_V4_1 */ +static bool nfs4_match_stateid(const nfs4_stateid *s1, + const nfs4_stateid *s2) +{ + return memcmp(s1->data, s2->data, sizeof(s1->data)) == 0; +} + + struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, @@ -6331,7 +6354,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, - .validate_stateid = nfs4_validate_delegation_stateid, + .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, @@ -6342,7 +6365,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, - .validate_stateid = nfs41_validate_delegation_stateid, + .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, From d0b496d2fc08cc51000fcdd9739235d1cab890cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 261/316] NFSv4: Rename encode_stateid() to encode_open_stateid() The current version of encode_stateid really only applies to open stateids. You can't use it for locks, delegations or layouts. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index bca8c77e5fe0..af11e8b5d367 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1528,7 +1528,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) hdr->replen += decode_putrootfh_maxsz; } -static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) +static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) { nfs4_stateid stateid; __be32 *p; @@ -1550,7 +1550,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_READ); - encode_stateid(xdr, args->context, args->lock_context, + encode_open_stateid(xdr, args->context, args->lock_context, hdr->minorversion); p = reserve_space(xdr, 12); @@ -1739,7 +1739,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_WRITE); - encode_stateid(xdr, args->context, args->lock_context, + encode_open_stateid(xdr, args->context, args->lock_context, hdr->minorversion); p = reserve_space(xdr, 16); From cb17e556f6202c200d38a2e0c05a5bd29060389f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 262/316] NFSv4: Add a helper for encoding opaque data Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index af11e8b5d367..de4cb5cfc318 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -885,6 +885,14 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) return p; } +static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, len); + xdr_encode_opaque_fixed(p, buf, len); +} + static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { __be32 *p; @@ -922,11 +930,7 @@ static void encode_nops(struct compound_hdr *hdr) static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) { - __be32 *p; - - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - BUG_ON(p == NULL); - xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); + encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); } static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) From ea9d23f51041036b5d5d062dae2fafe0f670449c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 263/316] NFSv4: Add a helper for encoding stateids Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 113 ++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index de4cb5cfc318..c03ba77679ad 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -928,6 +928,11 @@ static void encode_nops(struct compound_hdr *hdr) *hdr->nops_p = htonl(hdr->nops); } +static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); +} + static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) { encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); @@ -1070,10 +1075,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg { __be32 *p; - p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(OP_CLOSE); - *p++ = cpu_to_be32(arg->seqid->sequence->counter); - xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_nfs4_stateid(xdr, arg->stateid); hdr->nops++; hdr->replen += decode_close_maxsz; } @@ -1260,15 +1265,16 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); *p = cpu_to_be32(args->new_lock_owner); if (args->new_lock_owner){ - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(args->open_seqid->sequence->counter); - p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(args->open_seqid->sequence->counter); + encode_nfs4_stateid(xdr, args->open_stateid); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(args->lock_seqid->sequence->counter); encode_lockowner(xdr, &args->lock_owner); } else { - p = reserve_space(xdr, NFS4_STATEID_SIZE+4); - p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, args->lock_stateid); + p = reserve_space(xdr, 4); *p = cpu_to_be32(args->lock_seqid->sequence->counter); } hdr->nops++; @@ -1293,11 +1299,12 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar { __be32 *p; - p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); + p = reserve_space(xdr, 12); *p++ = cpu_to_be32(OP_LOCKU); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); - *p++ = cpu_to_be32(args->seqid->sequence->counter); - p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->seqid->sequence->counter); + encode_nfs4_stateid(xdr, args->stateid); + p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->fl->fl_start); xdr_encode_hyper(p, nfs4_lock_length(args->fl)); hdr->nops++; @@ -1457,9 +1464,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); - xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + encode_nfs4_stateid(xdr, stateid); encode_string(xdr, name->len, name->name); } @@ -1488,9 +1495,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(OP_OPEN_CONFIRM); - p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_OPEN_CONFIRM); + encode_nfs4_stateid(xdr, arg->stateid); + p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->seqid->sequence->counter); hdr->nops++; hdr->replen += decode_open_confirm_maxsz; @@ -1500,9 +1508,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); - p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_OPEN_DOWNGRADE); + encode_nfs4_stateid(xdr, arg->stateid); + p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); hdr->nops++; @@ -1535,16 +1544,14 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) { nfs4_stateid stateid; - __be32 *p; - p = reserve_space(xdr, NFS4_STATEID_SIZE); if (ctx->state != NULL) { nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) stateid.stateid.seqid = 0; - xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &stateid); } else - xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &zero_stateid); } static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) @@ -1668,9 +1675,9 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_SETATTR); - xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SETATTR); + encode_nfs4_stateid(xdr, &zero_stateid); p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); @@ -1697,9 +1704,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_SETATTR); - xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SETATTR); + encode_nfs4_stateid(xdr, &arg->stateid); hdr->nops++; hdr->replen += decode_setattr_maxsz; encode_attrs(xdr, arg->iap, server); @@ -1760,10 +1767,9 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - - *p++ = cpu_to_be32(OP_DELEGRETURN); - xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_DELEGRETURN); + encode_nfs4_stateid(xdr, stateid); hdr->nops++; hdr->replen += decode_delegreturn_maxsz; } @@ -1999,7 +2005,7 @@ encode_layoutget(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 40); *p++ = cpu_to_be32(OP_LAYOUTGET); *p++ = cpu_to_be32(0); /* Signal layout available */ *p++ = cpu_to_be32(args->type); @@ -2007,7 +2013,8 @@ encode_layoutget(struct xdr_stream *xdr, p = xdr_encode_hyper(p, args->range.offset); p = xdr_encode_hyper(p, args->range.length); p = xdr_encode_hyper(p, args->minlength); - p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4); *p = cpu_to_be32(args->maxcount); dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", @@ -2032,13 +2039,14 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 24); *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ - *p++ = cpu_to_be32(0); /* reclaim */ - p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(0); /* reclaim */ + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(1); /* newoffset = TRUE */ p = xdr_encode_hyper(p, args->lastbytewritten); *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ @@ -2070,11 +2078,11 @@ encode_layoutreturn(struct xdr_stream *xdr, *p++ = cpu_to_be32(args->layout_type); *p++ = cpu_to_be32(IOMODE_ANY); *p = cpu_to_be32(RETURN_FILE); - p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, 0); p = xdr_encode_hyper(p, NFS4_MAX_UINT64); spin_lock(&args->inode->i_lock); - xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( @@ -2107,10 +2115,10 @@ static void encode_test_stateid(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(OP_TEST_STATEID); - *p++ = cpu_to_be32(1); - xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(1); + encode_nfs4_stateid(xdr, args->stateid); hdr->nops++; hdr->replen += decode_test_stateid_maxsz; } @@ -2120,9 +2128,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_FREE_STATEID); - xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_FREE_STATEID); + encode_nfs4_stateid(xdr, args->stateid); hdr->nops++; hdr->replen += decode_free_stateid_maxsz; } @@ -5640,11 +5648,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) return status; - p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->return_on_close = be32_to_cpup(p); + decode_stateid(xdr, &res->stateid); + p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - res->return_on_close = be32_to_cpup(p++); - p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); layout_count = be32_to_cpup(p); if (!layout_count) { dprintk("%s: server responded with empty layout array\n", From 1e3987c3052a48fbfc8f5d30214c825eff41192d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 264/316] NFSv4: Rename nfs4_copy_stateid() It is really a function for selecting the correct stateid to use in a read or write situation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 624d4becf017..308d2f999c3d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -329,7 +329,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); +extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b0647b387403..f181c70ea933 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1922,7 +1922,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { /* Use that stateid */ } else if (state != NULL) { - nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid); + nfs4_select_rw_stateid(&arg.stateid, state, current->files, current->tgid); } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d60e7ad2690e..6ba82271c868 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -888,7 +888,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ -void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) +void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) { struct nfs4_lock_state *lsp; int seq; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c03ba77679ad..38736dca1b18 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1546,7 +1546,7 @@ static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_co nfs4_stateid stateid; if (ctx->state != NULL) { - nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); + nfs4_select_rw_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) stateid.stateid.seqid = 0; encode_nfs4_stateid(xdr, &stateid); From f597c53790f662662281b82b7692a22d2a4d4afa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:56 -0500 Subject: [PATCH 265/316] NFSv4: Add helpers for basic copying of stateids Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 14 ++++++-------- fs/nfs/nfs4_fs.h | 10 ++++++++++ fs/nfs/nfs4proc.c | 34 +++++++++++++++------------------- fs/nfs/nfs4state.c | 6 +++--- fs/nfs/pnfs.c | 10 ++++------ 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c7249e26e2e9..87f7544f3dce 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -105,7 +105,7 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; - if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); @@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, if (delegation != NULL) { spin_lock(&delegation->lock); if (delegation->inode != NULL) { - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); + nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; oldcred = delegation->cred; @@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); + nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; delegation->change_attr = inode->i_version; @@ -250,8 +248,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct old_delegation = rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); if (old_delegation != NULL) { - if (memcmp(&delegation->stateid, &old_delegation->stateid, - sizeof(old_delegation->stateid)) == 0 && + if (nfs4_stateid_match(&delegation->stateid, + &old_delegation->stateid) && delegation->type == old_delegation->type) { goto out; } @@ -708,7 +706,7 @@ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); if (delegation != NULL) { - memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); + nfs4_stateid_copy(dst, &delegation->stateid); ret = 1; } rcu_read_unlock(); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 308d2f999c3d..1c54ef3146d4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -349,6 +349,16 @@ struct nfs4_mount_data; extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; +static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) +{ + memcpy(dst->data, src->data, sizeof(dst->data)); +} + +static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) +{ + return memcmp(dst->data, src->data, sizeof(dst->data)) == 0; +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f181c70ea933..ce0ad81dd466 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -941,8 +941,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); - memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + nfs4_stateid_copy(&state->stateid, stateid); + nfs4_stateid_copy(&state->open_stateid, stateid); switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); @@ -970,7 +970,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s */ write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { - memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + nfs4_stateid_copy(&state->stateid, deleg_stateid); set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) @@ -1001,7 +1001,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat if (delegation == NULL) delegation = &deleg_cur->stateid; - else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) + else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); @@ -1062,7 +1062,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) break; } /* Save the delegation */ - memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) @@ -1225,10 +1225,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * Check if we need to update the current stateid. */ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && - memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + !nfs4_stateid_match(&state->stateid, &state->open_stateid)) { write_seqlock(&state->seqlock); if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); write_sequnlock(&state->seqlock); } return 0; @@ -1297,8 +1297,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs if (IS_ERR(opendata)) return PTR_ERR(opendata); opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - memcpy(opendata->o_arg.u.delegation.data, stateid->data, - sizeof(opendata->o_arg.u.delegation.data)); + nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); ret = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); return ret; @@ -1363,8 +1362,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; if (data->rpc_status == 0) { - memcpy(data->o_res.stateid.data, data->c_res.stateid.data, - sizeof(data->o_res.stateid.data)); + nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); nfs_confirm_seqid(&data->owner->so_seqid, 0); renew_lease(data->o_res.server, data->timestamp); data->rpc_done = 1; @@ -1924,7 +1922,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, } else if (state != NULL) { nfs4_select_rw_stateid(&arg.stateid, state, current->files, current->tgid); } else - memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); + nfs4_stateid_copy(&arg.stateid, &zero_stateid); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (status == 0 && state != NULL) @@ -3989,7 +3987,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->args.stateid = &data->stateid; data->args.bitmask = server->attr_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); - memcpy(&data->stateid, stateid, sizeof(data->stateid)); + nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; data->res.server = server; nfs_fattr_init(data->res.fattr); @@ -4172,9 +4170,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) return; switch (task->tk_status) { case 0: - memcpy(calldata->lsp->ls_stateid.data, - calldata->res.stateid.data, - sizeof(calldata->lsp->ls_stateid.data)); + nfs4_stateid_copy(&calldata->lsp->ls_stateid, + &calldata->res.stateid); renew_lease(calldata->server, calldata->timestamp); break; case -NFS4ERR_BAD_STATEID: @@ -4387,8 +4384,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) goto out; } if (data->rpc_status == 0) { - memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, - sizeof(data->lsp->ls_stateid.data)); + nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } @@ -6292,7 +6288,7 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1, static bool nfs4_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { - return memcmp(s1->data, s2->data, sizeof(s1->data)) == 0; + return nfs4_stateid_match(s1, s2); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ba82271c868..55c8a81cd6fb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -895,7 +895,7 @@ void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owne do { seq = read_seqbegin(&state->seqlock); - memcpy(dst, &state->stateid, sizeof(*dst)); + nfs4_stateid_copy(dst, &state->stateid); } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) return; @@ -903,7 +903,7 @@ void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owne spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_stateid_copy(dst, &lsp->ls_stateid); spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); } @@ -1126,7 +1126,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode, continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; - if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + if (!nfs4_stateid_match(&state->stateid, stateid)) continue; nfs4_state_mark_reclaim_nograce(clp, state); found = true; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 402efc2f5b70..c190e9c2e3d2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -499,7 +499,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); newseq = be32_to_cpu(new->stateid.seqid); if ((int)(newseq - oldseq) > 0) { - memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); + nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { u32 new_barrier = be32_to_cpu(new->stateid.seqid); @@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, do { seq = read_seqbegin(&open_state->seqlock); - memcpy(dst->data, open_state->stateid.data, - sizeof(open_state->stateid.data)); + nfs4_stateid_copy(dst, &open_state->stateid); } while (read_seqretry(&open_state->seqlock, seq)); } else - memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); + nfs4_stateid_copy(dst, &lo->plh_stateid); spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); return status; @@ -1527,8 +1526,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) end_pos = nfsi->layout->plh_lwb; nfsi->layout->plh_lwb = 0; - memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, - sizeof(nfsi->layout->plh_stateid.data)); + nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); spin_unlock(&inode->i_lock); data->args.inode = inode; From 2d2f24add1ff903ff8e0ce61c5c05635cc636985 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:57 -0500 Subject: [PATCH 266/316] NFSv4: Simplify the struct nfs4_stateid Replace the union with the common struct stateid4 as defined in both RFC3530 and RFC5661. This makes it easier to access the sequence id, which will again make implementing support for parallel OPEN calls easier. Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 4 ++-- fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4proc.c | 7 +++---- fs/nfs/nfs4state.c | 4 ++-- fs/nfs/nfs4xdr.c | 6 +++--- fs/nfs/pnfs.c | 10 +++++----- include/linux/nfs4.h | 7 ++----- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 5466829c7e77..fd6cfdb917da 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -138,10 +138,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { __be32 *p; - p = read_buf(xdr, 16); + p = read_buf(xdr, NFS4_STATEID_SIZE); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(stateid->data, p, 16); + memcpy(stateid, p, NFS4_STATEID_SIZE); return 0; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1c54ef3146d4..16373df96f90 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -351,12 +351,12 @@ extern struct svc_version nfs4_callback_version4; static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) { - memcpy(dst->data, src->data, sizeof(dst->data)); + memcpy(dst, src, sizeof(*dst)); } static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) { - return memcmp(dst->data, src->data, sizeof(dst->data)) == 0; + return memcmp(dst, src, sizeof(*dst)) == 0; } #else diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ce0ad81dd466..e0e35288361c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6271,13 +6271,12 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) static bool nfs41_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { - if (memcmp(s1->stateid.other, s2->stateid.other, - sizeof(s1->stateid.other)) != 0) + if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) return false; - if (s1->stateid.seqid == s2->stateid.seqid) + if (s1->seqid == s2->seqid) return true; - if (s1->stateid.seqid == 0 || s2->stateid.seqid == 0) + if (s1->seqid == 0 || s2->seqid == 0) return true; return false; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 55c8a81cd6fb..1dad5c53c7fa 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1240,8 +1240,8 @@ restart: * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(state->stateid.data, 0, - sizeof(state->stateid.data)); + memset(&state->stateid, 0, + sizeof(state->stateid)); /* Mark the file as being 'closed' */ state->state = 0; break; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 38736dca1b18..76ef98632839 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -930,7 +930,7 @@ static void encode_nops(struct compound_hdr *hdr) static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) { - encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); + encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); } static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) @@ -1548,7 +1548,7 @@ static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_co if (ctx->state != NULL) { nfs4_select_rw_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) - stateid.stateid.seqid = 0; + stateid.seqid = 0; encode_nfs4_stateid(xdr, &stateid); } else encode_nfs4_stateid(xdr, &zero_stateid); @@ -4237,7 +4237,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { - return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); + return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); } static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c190e9c2e3d2..6f1c1e3d12bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, { u32 oldseq, newseq; - oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); - newseq = be32_to_cpu(new->stateid.seqid); + oldseq = be32_to_cpu(lo->plh_stateid.seqid); + newseq = be32_to_cpu(new->seqid); if ((int)(newseq - oldseq) > 0) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->stateid.seqid); + u32 new_barrier = be32_to_cpu(new->seqid); if ((int)(new_barrier - lo->plh_barrier)) lo->plh_barrier = new_barrier; @@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, int lget) { if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) + (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) return true; return lo->plh_block_lgets || test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || @@ -759,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) } if (!found) { struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); + u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); /* Since close does not return a layout stateid for use as * a barrier, we choose the worst-case barrier. diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32345c2805c0..834df8bf08b6 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -183,15 +183,12 @@ struct nfs4_acl { typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; -struct nfs41_stateid { +struct nfs_stateid4 { __be32 seqid; char other[NFS4_STATEID_OTHER_SIZE]; } __attribute__ ((packed)); -typedef union { - char data[NFS4_STATEID_SIZE]; - struct nfs41_stateid stateid; -} nfs4_stateid; +typedef struct nfs_stateid4 nfs4_stateid; enum nfs_opnum4 { OP_ACCESS = 3, From 6fdfb0bc2a43f5deb612b7f79d9c7750708e0184 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:57 -0500 Subject: [PATCH 267/316] NFSv4: Minor clean ups for encode_string() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 76ef98632839..d6e8306d02a7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -897,8 +897,7 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char * { __be32 *p; - p = xdr_reserve_space(xdr, 4 + len); - BUG_ON(p == NULL); + p = reserve_space(xdr, 4 + len); xdr_encode_opaque(p, str, len); } @@ -915,8 +914,8 @@ static void encode_compound_hdr(struct xdr_stream *xdr, hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); - p = reserve_space(xdr, 4 + hdr->taglen + 8); - p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); + encode_string(xdr, hdr->taglen, hdr->tag); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(hdr->minorversion); hdr->nops_p = p; *p = cpu_to_be32(hdr->nops); @@ -1216,9 +1215,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct { __be32 *p; - p = reserve_space(xdr, 8 + name->len); - *p++ = cpu_to_be32(OP_LINK); - xdr_encode_opaque(p, name->name, name->len); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_LINK); + encode_string(xdr, name->len, name->name); hdr->nops++; hdr->replen += decode_link_maxsz; } @@ -1324,12 +1323,11 @@ static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lo static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - int len = name->len; __be32 *p; - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_LOOKUP); - xdr_encode_opaque(p, name->name, len); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_LOOKUP); + encode_string(xdr, name->len, name->name); hdr->nops++; hdr->replen += decode_lookup_maxsz; } @@ -1521,12 +1519,11 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close static void encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { - int len = fh->size; __be32 *p; - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_PUTFH); - xdr_encode_opaque(p, fh->data, len); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_PUTFH); + encode_string(xdr, fh->size, fh->data); hdr->nops++; hdr->replen += decode_putfh_maxsz; } @@ -1628,9 +1625,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc { __be32 *p; - p = reserve_space(xdr, 8 + name->len); - *p++ = cpu_to_be32(OP_REMOVE); - xdr_encode_opaque(p, name->name, name->len); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_REMOVE); + encode_string(xdr, name->len, name->name); hdr->nops++; hdr->replen += decode_remove_maxsz; } @@ -1776,12 +1773,11 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - int len = name->len; __be32 *p; - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_SECINFO); - xdr_encode_opaque(p, name->name, len); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SECINFO); + encode_string(xdr, name->len, name->name); hdr->nops++; hdr->replen += decode_secinfo_maxsz; } From 4ade9821602ada8f56f3a3eb444dedbe42f1730e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:57 -0500 Subject: [PATCH 268/316] NFSv4: Add a helper for encoding NFSv4 sequence ids Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d6e8306d02a7..3b38ca5bafe8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -901,6 +901,20 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char * xdr_encode_opaque(p, str, len); } +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(n); +} + +static void encode_nfs4_seqid(struct xdr_stream *xdr, + const struct nfs_seqid *seqid) +{ + encode_uint32(xdr, seqid->sequence->counter); +} + static void encode_compound_hdr(struct xdr_stream *xdr, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1074,9 +1088,9 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg { __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_CLOSE); - *p = cpu_to_be32(arg->seqid->sequence->counter); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_CLOSE); + encode_nfs4_seqid(xdr, arg->seqid); encode_nfs4_stateid(xdr, arg->stateid); hdr->nops++; hdr->replen += decode_close_maxsz; @@ -1264,17 +1278,14 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); *p = cpu_to_be32(args->new_lock_owner); if (args->new_lock_owner){ - p = reserve_space(xdr, 4); - *p = cpu_to_be32(args->open_seqid->sequence->counter); + encode_nfs4_seqid(xdr, args->open_seqid); encode_nfs4_stateid(xdr, args->open_stateid); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(args->lock_seqid->sequence->counter); + encode_nfs4_seqid(xdr, args->lock_seqid); encode_lockowner(xdr, &args->lock_owner); } else { encode_nfs4_stateid(xdr, args->lock_stateid); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(args->lock_seqid->sequence->counter); + encode_nfs4_seqid(xdr, args->lock_seqid); } hdr->nops++; hdr->replen += decode_lock_maxsz; @@ -1298,10 +1309,10 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar { __be32 *p; - p = reserve_space(xdr, 12); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(OP_LOCKU); - *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); - *p = cpu_to_be32(args->seqid->sequence->counter); + *p = cpu_to_be32(nfs4_lock_type(args->fl, 0)); + encode_nfs4_seqid(xdr, args->seqid); encode_nfs4_stateid(xdr, args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->fl->fl_start); @@ -1360,9 +1371,9 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, * owner 4 = 32 */ - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_OPEN); - *p = cpu_to_be32(arg->seqid->sequence->counter); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_OPEN); + encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); p = reserve_space(xdr, 32); p = xdr_encode_hyper(p, arg->clientid); @@ -1496,8 +1507,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_OPEN_CONFIRM); encode_nfs4_stateid(xdr, arg->stateid); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_nfs4_seqid(xdr, arg->seqid); hdr->nops++; hdr->replen += decode_open_confirm_maxsz; } @@ -1509,8 +1519,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_OPEN_DOWNGRADE); encode_nfs4_stateid(xdr, arg->stateid); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); hdr->nops++; hdr->replen += decode_open_downgrade_maxsz; From ab19b4813fdbdef8f9c8732d1f7a2a69ae78d00b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 18:13:57 -0500 Subject: [PATCH 269/316] NFSv4: Add a encode op helper Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 161 ++++++++++------------------------------------- 1 file changed, 32 insertions(+), 129 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3b38ca5bafe8..e9d4ac06b5d9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -935,6 +935,15 @@ static void encode_compound_hdr(struct xdr_stream *xdr, *p = cpu_to_be32(hdr->nops); } +static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, + uint32_t replen, + struct compound_hdr *hdr) +{ + encode_uint32(xdr, op); + hdr->nops++; + hdr->replen += replen; +} + static void encode_nops(struct compound_hdr *hdr) { BUG_ON(hdr->nops > NFS4_MAX_OPS); @@ -1086,14 +1095,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_CLOSE); + encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); encode_nfs4_seqid(xdr, arg->seqid); encode_nfs4_stateid(xdr, arg->stateid); - hdr->nops++; - hdr->replen += decode_close_maxsz; } static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -1172,8 +1176,7 @@ encode_getattr_three(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); if (bm2) { p = reserve_space(xdr, 16); *p++ = cpu_to_be32(3); @@ -1190,8 +1193,6 @@ encode_getattr_three(struct xdr_stream *xdr, *p++ = cpu_to_be32(1); *p = cpu_to_be32(bm0); } - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -1217,23 +1218,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_GETFH); - hdr->nops++; - hdr->replen += decode_getfh_maxsz; + encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr); } static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_LINK); + encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr); encode_string(xdr, name->len, name->name); - hdr->nops++; - hdr->replen += decode_link_maxsz; } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -1323,24 +1314,14 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RELEASE_LOCKOWNER); + encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr); encode_lockowner(xdr, lowner); - hdr->nops++; - hdr->replen += decode_release_lockowner_maxsz; } static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_LOOKUP); + encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr); encode_string(xdr, name->len, name->name); - hdr->nops++; - hdr->replen += decode_lookup_maxsz; } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1371,8 +1352,6 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, * owner 4 = 32 */ - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_OPEN); encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); p = reserve_space(xdr, 32); @@ -1481,6 +1460,7 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { + encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); encode_openhdr(xdr, arg); encode_opentype(xdr, arg); switch (arg->claim) { @@ -1496,55 +1476,33 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, default: BUG(); } - hdr->nops++; - hdr->replen += decode_open_maxsz; } static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_OPEN_CONFIRM); + encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr); encode_nfs4_stateid(xdr, arg->stateid); encode_nfs4_seqid(xdr, arg->seqid); - hdr->nops++; - hdr->replen += decode_open_confirm_maxsz; } static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_OPEN_DOWNGRADE); + encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); encode_nfs4_stateid(xdr, arg->stateid); encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); - hdr->nops++; - hdr->replen += decode_open_downgrade_maxsz; } static void encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_PUTFH); + encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr); encode_string(xdr, fh->size, fh->data); - hdr->nops++; - hdr->replen += decode_putfh_maxsz; } static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_PUTROOTFH); - hdr->nops++; - hdr->replen += decode_putrootfh_maxsz; + encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) @@ -1564,17 +1522,13 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_READ); - + encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); encode_open_stateid(xdr, args->context, args->lock_context, hdr->minorversion); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); - hdr->nops++; - hdr->replen += decode_read_maxsz; } static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1622,35 +1576,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_READLINK); - hdr->nops++; - hdr->replen += decode_readlink_maxsz; + encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr); } static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_REMOVE); + encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr); encode_string(xdr, name->len, name->name); - hdr->nops++; - hdr->replen += decode_remove_maxsz; } static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RENAME); + encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr); encode_string(xdr, oldname->len, oldname->name); encode_string(xdr, newname->len, newname->name); - hdr->nops++; - hdr->replen += decode_rename_maxsz; } static void encode_renew(struct xdr_stream *xdr, clientid4 clid, @@ -1668,12 +1607,7 @@ static void encode_renew(struct xdr_stream *xdr, clientid4 clid, static void encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RESTOREFH); - hdr->nops++; - hdr->replen += decode_restorefh_maxsz; + encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr); } static void @@ -1681,8 +1615,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SETATTR); + encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr); encode_nfs4_stateid(xdr, &zero_stateid); p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); @@ -1691,30 +1624,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); - hdr->nops++; - hdr->replen += decode_setacl_maxsz; } static void encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SAVEFH); - hdr->nops++; - hdr->replen += decode_savefh_maxsz; + encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr); } static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SETATTR); + encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); - hdr->nops++; - hdr->replen += decode_setattr_maxsz; encode_attrs(xdr, arg->iap, server); } @@ -1753,9 +1674,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_WRITE); - + encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); encode_open_stateid(xdr, args->context, args->lock_context, hdr->minorversion); @@ -1765,30 +1684,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg *p = cpu_to_be32(args->count); xdr_write_pages(xdr, args->pages, args->pgbase, args->count); - hdr->nops++; - hdr->replen += decode_write_maxsz; } static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_DELEGRETURN); + encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr); encode_nfs4_stateid(xdr, stateid); - hdr->nops++; - hdr->replen += decode_delegreturn_maxsz; } static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SECINFO); + encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr); encode_string(xdr, name->len, name->name); - hdr->nops++; - hdr->replen += decode_secinfo_maxsz; } #if defined(CONFIG_NFS_V4_1) @@ -2132,12 +2039,8 @@ static void encode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_args *args, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_FREE_STATEID); + encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); encode_nfs4_stateid(xdr, args->stateid); - hdr->nops++; - hdr->replen += decode_free_stateid_maxsz; } #endif /* CONFIG_NFS_V4_1 */ From cd93710e8d290711ba2e08e1d1a380013aad667d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 2 Mar 2012 17:14:31 -0500 Subject: [PATCH 270/316] NFS: Fix nfs4_verifier memory alignment Clean up due to code review. The nfs4_verifier's data field is not guaranteed to be u32-aligned. Casting an array of chars to a u32 * is considered generally hazardous. Fix this by using a __be32 array to generate a verifier's contents, and then byte-copy the contents into the verifier field. The contents of a verifier, for all intents and purposes, are opaque bytes. Only local code that generates a verifier need know the actual content and format. Everyone else compares the full byte array for exact equality. Also, sizeof(nfs4_verifer) is the size of the in-core verifier data structure, but NFS4_VERIFIER_SIZE is the number of octets in an XDR'd verifier. The two are not interchangeable, even if they happen to have the same value. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 32 +++++++++++++++++++------------- fs/nfs/nfs4xdr.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e0e35288361c..1ec05222ccbc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -836,13 +836,15 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { - u32 *s; + __be32 verf[2]; p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); - s = (u32 *) p->o_arg.u.verifier.data; - s[0] = jiffies; - s[1] = current->pid; + + verf[0] = jiffies; + verf[1] = current->pid; + memcpy(p->o_arg.u.verifier.data, verf, + sizeof(p->o_arg.u.verifier.data)); } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; @@ -3819,6 +3821,16 @@ wait_on_recovery: return -EAGAIN; } +static void nfs4_construct_boot_verifier(struct nfs_client *clp, + nfs4_verifier *bootverf) +{ + __be32 verf[2]; + + verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); + verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); + memcpy(bootverf->data, verf, sizeof(bootverf->data)); +} + int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -3835,13 +3847,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; - __be32 *p; int loop = 0; int status; - p = (__be32*)sc_verifier.data; - *p++ = htonl((u32)clp->cl_boot_time.tv_sec); - *p = htonl((u32)clp->cl_boot_time.tv_nsec); + nfs4_construct_boot_verifier(clp, &sc_verifier); for(;;) { rcu_read_lock(); @@ -4933,6 +4942,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { + .verifier = &verifier, .client = clp, .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, }; @@ -4946,15 +4956,11 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_resp = &res, .rpc_cred = cred, }; - __be32 *p; dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); - p = (u32 *)verifier.data; - *p++ = htonl((u32)clp->cl_boot_time.tv_sec); - *p = htonl((u32)clp->cl_boot_time.tv_nsec); - args.verifier = &verifier; + nfs4_construct_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s.%s/%u", diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e9d4ac06b5d9..62effaf579c4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1538,7 +1538,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count >> 1; - __be32 *p; + __be32 *p, verf[2]; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1553,10 +1553,11 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; - p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); + p = reserve_space(xdr, 12); *p++ = cpu_to_be32(OP_READDIR); p = xdr_encode_hyper(p, readdir->cookie); - p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); + encode_nfs4_verifier(xdr, &readdir->verifier); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); *p++ = cpu_to_be32(2); @@ -1565,11 +1566,11 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); hdr->nops++; hdr->replen += decode_readdir_maxsz; + memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, - ((u32 *)readdir->verifier.data)[0], - ((u32 *)readdir->verifier.data)[1], + verf[0], verf[1], attrs[0] & readdir->bitmask[0], attrs[1] & readdir->bitmask[1]); } @@ -1643,9 +1644,9 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie { __be32 *p; - p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); - *p++ = cpu_to_be32(OP_SETCLIENTID); - xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SETCLIENTID); + encode_nfs4_verifier(xdr, setclientid->sc_verifier); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); p = reserve_space(xdr, 4); @@ -1662,10 +1663,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 { __be32 *p; - p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); + p = reserve_space(xdr, 12); *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); p = xdr_encode_hyper(p, arg->clientid); - xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE); + encode_nfs4_verifier(xdr, &arg->confirm); hdr->nops++; hdr->replen += decode_setclientid_confirm_maxsz; } @@ -1708,9 +1709,9 @@ static void encode_exchange_id(struct xdr_stream *xdr, char impl_name[NFS4_OPAQUE_LIMIT]; int len = 0; - p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); - *p++ = cpu_to_be32(OP_EXCHANGE_ID); - xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_EXCHANGE_ID); + encode_nfs4_verifier(xdr, args->verifier); encode_string(xdr, args->id_len, args->id); @@ -4162,7 +4163,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) static int decode_verifier(struct xdr_stream *xdr, void *verifier) { - return decode_opaque_fixed(xdr, verifier, 8); + return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) @@ -4854,17 +4855,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; int status; + __be32 verf[2]; status = decode_op_hdr(xdr, OP_READDIR); if (!status) status = decode_verifier(xdr, readdir->verifier.data); if (unlikely(status)) return status; + memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", - __func__, - ((u32 *)readdir->verifier.data)[0], - ((u32 *)readdir->verifier.data)[1]); - + __func__, verf[0], verf[1]); hdrlen = (char *) xdr->p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -5111,7 +5111,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, 8); + memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5455,7 +5455,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); + p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); res->num_devs = be32_to_cpup(p); From 700195142185c05757cfd27f8070ae0e9e07710b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Mar 2012 20:49:32 -0500 Subject: [PATCH 271/316] NFSv4: Cleanup - convert more functions to use encode_op_hdr Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 62effaf579c4..f958849cb304 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1644,8 +1644,7 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SETCLIENTID); + encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); @@ -1655,8 +1654,6 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_cb_ident); - hdr->nops++; - hdr->replen += decode_setclientid_maxsz; } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1709,8 +1706,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, char impl_name[NFS4_OPAQUE_LIMIT]; int len = 0; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_EXCHANGE_ID); + encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); encode_string(xdr, args->id_len, args->id); @@ -1740,9 +1736,6 @@ static void encode_exchange_id(struct xdr_stream *xdr, *p = cpu_to_be32(0); } else *p = cpu_to_be32(0); /* implementation id array length=0 */ - - hdr->nops++; - hdr->replen += decode_exchange_id_maxsz; } static void encode_create_session(struct xdr_stream *xdr, From 475d4ba02c3748b69cc71fa5c11c4b281cac5928 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2012 11:27:16 -0500 Subject: [PATCH 272/316] NFSv4: More xdr cleanups Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 164 +++++++++++++++-------------------------------- 1 file changed, 50 insertions(+), 114 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f958849cb304..4ea9f50a32f2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1084,13 +1084,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_ACCESS); - *p = cpu_to_be32(access); - hdr->nops++; - hdr->replen += decode_access_maxsz; + encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr); + encode_uint32(xdr, access); } static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) @@ -1104,21 +1099,18 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar { __be32 *p; - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(OP_COMMIT); + encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); + p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); - hdr->nops++; - hdr->replen += decode_commit_maxsz; } static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_CREATE); - *p = cpu_to_be32(create->ftype); + encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr); + encode_uint32(xdr, create->ftype); switch (create->ftype) { case NF4LNK: @@ -1138,9 +1130,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - hdr->nops++; - hdr->replen += decode_create_maxsz; - encode_attrs(xdr, create->attrs, create->server); } @@ -1148,25 +1137,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c { __be32 *p; - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(1); *p = cpu_to_be32(bitmap); - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); + p = reserve_space(xdr, 12); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(bm0); *p = cpu_to_be32(bm1); - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void @@ -1261,8 +1246,8 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args { __be32 *p; - p = reserve_space(xdr, 32); - *p++ = cpu_to_be32(OP_LOCK); + encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr); + p = reserve_space(xdr, 28); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); *p++ = cpu_to_be32(args->reclaim); p = xdr_encode_hyper(p, args->fl->fl_start); @@ -1278,38 +1263,31 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args encode_nfs4_stateid(xdr, args->lock_stateid); encode_nfs4_seqid(xdr, args->lock_seqid); } - hdr->nops++; - hdr->replen += decode_lock_maxsz; } static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 24); - *p++ = cpu_to_be32(OP_LOCKT); + encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); p = xdr_encode_hyper(p, args->fl->fl_start); p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); encode_lockowner(xdr, &args->lock_owner); - hdr->nops++; - hdr->replen += decode_lockt_maxsz; } static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_LOCKU); - *p = cpu_to_be32(nfs4_lock_type(args->fl, 0)); + encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); + encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); encode_nfs4_seqid(xdr, args->seqid); encode_nfs4_stateid(xdr, args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->fl->fl_start); xdr_encode_hyper(p, nfs4_lock_length(args->fl)); - hdr->nops++; - hdr->replen += decode_locku_maxsz; } static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) @@ -1553,8 +1531,8 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_READDIR); + encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); + p = reserve_space(xdr, 8); p = xdr_encode_hyper(p, readdir->cookie); encode_nfs4_verifier(xdr, &readdir->verifier); p = reserve_space(xdr, 20); @@ -1564,8 +1542,6 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); - hdr->nops++; - hdr->replen += decode_readdir_maxsz; memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, @@ -1598,11 +1574,9 @@ static void encode_renew(struct xdr_stream *xdr, clientid4 clid, { __be32 *p; - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_RENEW); + encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr); + p = reserve_space(xdr, 8); xdr_encode_hyper(p, clid); - hdr->nops++; - hdr->replen += decode_renew_maxsz; } static void @@ -1660,12 +1634,11 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 { __be32 *p; - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); + encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM, + decode_setclientid_confirm_maxsz, hdr); + p = reserve_space(xdr, 8); p = xdr_encode_hyper(p, arg->clientid); encode_nfs4_verifier(xdr, &arg->confirm); - hdr->nops++; - hdr->replen += decode_setclientid_confirm_maxsz; } static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -1758,8 +1731,8 @@ static void encode_create_session(struct xdr_stream *xdr, len = scnprintf(machine_name, sizeof(machine_name), "%s", clp->cl_ipaddr); - p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); - *p++ = cpu_to_be32(OP_CREATE_SESSION); + encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); + p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); p = xdr_encode_hyper(p, clp->cl_clientid); *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ *p++ = cpu_to_be32(args->flags); /*flags */ @@ -1792,33 +1765,22 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ *p = cpu_to_be32(0); /* No more gids */ - hdr->nops++; - hdr->replen += decode_create_session_maxsz; } static void encode_destroy_session(struct xdr_stream *xdr, struct nfs4_session *session, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(OP_DESTROY_SESSION); - xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); - hdr->nops++; - hdr->replen += decode_destroy_session_maxsz; + encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr); + encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); } static void encode_reclaim_complete(struct xdr_stream *xdr, struct nfs41_reclaim_complete_args *args, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); - *p++ = cpu_to_be32(args->one_fs); - hdr->nops++; - hdr->replen += decode_reclaim_complete_maxsz; + encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr); + encode_uint32(xdr, args->one_fs); } #endif /* CONFIG_NFS_V4_1 */ @@ -1840,8 +1802,7 @@ static void encode_sequence(struct xdr_stream *xdr, WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); slot = tp->slots + args->sa_slotid; - p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); - *p++ = cpu_to_be32(OP_SEQUENCE); + encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); /* * Sessionid + seqid + slotid + max slotid + cache_this @@ -1855,13 +1816,12 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[3], slot->seq_nr, args->sa_slotid, tp->highest_used_slotid, args->sa_cache_this); + p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); *p++ = cpu_to_be32(args->sa_slotid); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); - hdr->nops++; - hdr->replen += decode_sequence_maxsz; #endif /* CONFIG_NFS_V4_1 */ } @@ -1876,14 +1836,12 @@ encode_getdevicelist(struct xdr_stream *xdr, .data = "dummmmmy", }; - p = reserve_space(xdr, 20); - *p++ = cpu_to_be32(OP_GETDEVICELIST); + encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); + p = reserve_space(xdr, 16); *p++ = cpu_to_be32(args->layoutclass); *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); xdr_encode_hyper(p, 0ULL); /* cookie */ encode_nfs4_verifier(xdr, &dummy); - hdr->nops++; - hdr->replen += decode_getdevicelist_maxsz; } static void @@ -1893,15 +1851,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); - *p++ = cpu_to_be32(OP_GETDEVICEINFO); + encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); + p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(args->pdev->layout_type); *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ *p++ = cpu_to_be32(0); /* bitmap length 0 */ - hdr->nops++; - hdr->replen += decode_getdeviceinfo_maxsz; } static void @@ -1911,8 +1867,8 @@ encode_layoutget(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 40); - *p++ = cpu_to_be32(OP_LAYOUTGET); + encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr); + p = reserve_space(xdr, 36); *p++ = cpu_to_be32(0); /* Signal layout available */ *p++ = cpu_to_be32(args->type); *p++ = cpu_to_be32(args->range.iomode); @@ -1920,8 +1876,7 @@ encode_layoutget(struct xdr_stream *xdr, p = xdr_encode_hyper(p, args->range.length); p = xdr_encode_hyper(p, args->minlength); encode_nfs4_stateid(xdr, &args->stateid); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(args->maxcount); + encode_uint32(xdr, args->maxcount); dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", __func__, @@ -1930,8 +1885,6 @@ encode_layoutget(struct xdr_stream *xdr, (unsigned long)args->range.offset, (unsigned long)args->range.length, args->maxcount); - hdr->nops++; - hdr->replen += decode_layoutget_maxsz; } static int @@ -1945,8 +1898,8 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - p = reserve_space(xdr, 24); - *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); + encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr); + p = reserve_space(xdr, 20); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ @@ -1961,13 +1914,9 @@ encode_layoutcommit(struct xdr_stream *xdr, if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( NFS_I(inode)->layout, xdr, args); - else { - p = reserve_space(xdr, 4); - *p = cpu_to_be32(0); /* no layout-type payload */ - } + else + encode_uint32(xdr, 0); /* no layout-type payload */ - hdr->nops++; - hdr->replen += decode_layoutcommit_maxsz; return 0; } @@ -1978,8 +1927,8 @@ encode_layoutreturn(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 20); - *p++ = cpu_to_be32(OP_LAYOUTRETURN); + encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); + p = reserve_space(xdr, 16); *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ *p++ = cpu_to_be32(args->layout_type); *p++ = cpu_to_be32(IOMODE_ANY); @@ -1993,12 +1942,8 @@ encode_layoutreturn(struct xdr_stream *xdr, if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( NFS_I(args->inode)->layout, xdr, args); - } else { - p = reserve_space(xdr, 4); - *p = cpu_to_be32(0); - } - hdr->nops++; - hdr->replen += decode_layoutreturn_maxsz; + } else + encode_uint32(xdr, 0); } static int @@ -2006,12 +1951,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr, const struct nfs41_secinfo_no_name_args *args, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); - *p++ = cpu_to_be32(args->style); - hdr->nops++; - hdr->replen += decode_secinfo_no_name_maxsz; + encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr); + encode_uint32(xdr, args->style); return 0; } @@ -2019,14 +1960,9 @@ static void encode_test_stateid(struct xdr_stream *xdr, struct nfs41_test_stateid_args *args, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_TEST_STATEID); - *p = cpu_to_be32(1); + encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); + encode_uint32(xdr, 1); encode_nfs4_stateid(xdr, args->stateid); - hdr->nops++; - hdr->replen += decode_test_stateid_maxsz; } static void encode_free_stateid(struct xdr_stream *xdr, From ff2eb6818d0d5b2691c112f51c539a817fcc59fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2012 11:40:12 -0500 Subject: [PATCH 273/316] NFSv4: Add a helper encode_uint64 Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4ea9f50a32f2..e4bb8e6409a7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -909,6 +909,14 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n) *p = cpu_to_be32(n); } +static void encode_uint64(struct xdr_stream *xdr, u64 n) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + xdr_encode_hyper(p, n); +} + static void encode_nfs4_seqid(struct xdr_stream *xdr, const struct nfs_seqid *seqid) { @@ -1532,8 +1540,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg attrs[0] |= FATTR4_WORD0_FILEID; encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); - p = reserve_space(xdr, 8); - p = xdr_encode_hyper(p, readdir->cookie); + encode_uint64(xdr, readdir->cookie); encode_nfs4_verifier(xdr, &readdir->verifier); p = reserve_space(xdr, 20); *p++ = cpu_to_be32(dircount); @@ -1572,11 +1579,8 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co static void encode_renew(struct xdr_stream *xdr, clientid4 clid, struct compound_hdr *hdr) { - __be32 *p; - encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr); - p = reserve_space(xdr, 8); - xdr_encode_hyper(p, clid); + encode_uint64(xdr, clid); } static void @@ -1632,12 +1636,9 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) { - __be32 *p; - encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM, decode_setclientid_confirm_maxsz, hdr); - p = reserve_space(xdr, 8); - p = xdr_encode_hyper(p, arg->clientid); + encode_uint64(xdr, arg->clientid); encode_nfs4_verifier(xdr, &arg->confirm); } From d8e0539ebdff5ff27fa7eb019715d9dfb5171a3b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 6 Mar 2012 20:46:43 -0500 Subject: [PATCH 274/316] NFS: add filehandle crc for debug display Match wireshark's CRC-32 hash for easier debugging Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 23 ++++++++++++++++++++--- include/linux/nfs_fs.h | 8 ++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ba03b7908149..0337554f1561 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1045,7 +1046,23 @@ struct nfs_fh *nfs_alloc_fhandle(void) return fh; } -/** +#ifdef RPC_DEBUG +/* + * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle + * in the same way that wireshark does + * + * @fh: file handle + * + * For debugging only. + */ +u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) +{ + /* wireshark uses 32-bit AUTODIN crc and does a bitwise + * not on the result */ + return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); +} + +/* * _nfs_display_fhandle - display an NFS file handle on the console * * @fh: file handle to display @@ -1053,7 +1070,6 @@ struct nfs_fh *nfs_alloc_fhandle(void) * * For debugging only. */ -#ifdef RPC_DEBUG void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) { unsigned short i; @@ -1063,7 +1079,8 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) return; } - printk(KERN_DEFAULT "%s at %p is %u bytes:\n", caption, fh, fh->size); + printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", + caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); for (i = 0; i < fh->size; i += 16) { __be32 *pos = (__be32 *)&fh->data[i]; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c07a757649dc..ce8e4361ad14 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -396,6 +396,11 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh) } #ifdef RPC_DEBUG +extern u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh); +static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) +{ + return _nfs_display_fhandle_hash(fh); +} extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption); #define nfs_display_fhandle(fh, caption) \ do { \ @@ -403,6 +408,9 @@ extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption); _nfs_display_fhandle(fh, caption); \ } while (0) #else +static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) +{ +} static inline void nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) { From 4f1abd226d80ef763c50e3930b369b63dffbb312 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 6 Mar 2012 21:58:20 -0500 Subject: [PATCH 275/316] NFS: add fh_crc to debug output Print the filehandle crc in two debug messages Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0337554f1561..70e25c9c5670 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -390,9 +390,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), + nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); out: @@ -1274,8 +1275,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, + nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) From 9cb8196839ab4ec87710526e9c43ac7f5dba69d3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 7 Mar 2012 10:49:41 -0500 Subject: [PATCH 276/316] NFSv4.1 handle DS stateid errors Handle DS READ and WRITE stateid errors by recovering the stateid on the MDS. NFS4ERR_OLD_STATEID is ignored as the client always sends a state sequenceid of zero for DS READ and WRITE stateids. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 1 + fs/nfs/nfs4filelayout.c | 29 ++++++++++++++++++++++++++++- fs/nfs/nfs4state.c | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 87f7544f3dce..97d53574bf53 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -474,6 +474,7 @@ void nfs_remove_bad_delegation(struct inode *inode) nfs_free_delegation(delegation); } } +EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** * nfs_expire_all_delegation_types diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 47e8f3435d38..b2d3bb5971bb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -36,6 +36,7 @@ #include #include "internal.h" +#include "delegation.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -86,12 +87,31 @@ static int filelayout_async_handle_error(struct rpc_task *task, struct nfs_client *clp, int *reset) { + struct nfs_server *mds_server = NFS_SERVER(state->inode); + struct nfs_client *mds_client = mds_server->nfs_client; + if (task->tk_status >= 0) return 0; - *reset = 0; switch (task->tk_status) { + /* MDS state errors */ + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + nfs4_schedule_stateid_recovery(mds_server, state); + goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(mds_server, state); + nfs4_schedule_lease_recovery(mds_client); + goto wait_on_recovery; + /* DS session errors */ case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -117,8 +137,15 @@ static int filelayout_async_handle_error(struct rpc_task *task, *reset = 1; break; } +out: task->tk_status = 0; return -EAGAIN; +wait_on_recovery: + rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) + rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); + goto out; + } /* NFS_PROTO call done callback routines */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1dad5c53c7fa..a58d02a0c27d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1072,6 +1072,7 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); nfs4_schedule_state_manager(clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); void nfs4_schedule_path_down_recovery(struct nfs_client *clp) { @@ -1109,6 +1110,7 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid) From cf470c3e004efe16d73dc8ba9b29bdc9a5327cda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Mar 2012 13:49:12 -0500 Subject: [PATCH 277/316] NFSv4: Don't free the nfs4_lock_state until after the release_lockowner Otherwise we can end up with sequence id problems if the client reuses the owner_id before the server has processed the release_lockowner Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 +++- fs/nfs/nfs4proc.c | 31 ++++++++++++++++++++----------- fs/nfs/nfs4state.c | 8 +++++--- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 16373df96f90..026878cb2698 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -213,7 +213,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); -extern void nfs4_release_lockowner(const struct nfs4_lock_state *); +extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; #if defined(CONFIG_NFS_V4_1) @@ -338,6 +338,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); +extern void nfs4_free_lock_state(struct nfs4_lock_state *lsp); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1ec05222ccbc..32e0d08a9771 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4745,8 +4745,15 @@ out: return err; } +struct nfs_release_lockowner_data { + struct nfs4_lock_state *lsp; + struct nfs_release_lockowner_args args; +}; + static void nfs4_release_lockowner_release(void *calldata) { + struct nfs_release_lockowner_data *data = calldata; + nfs4_free_lock_state(data->lsp); kfree(calldata); } @@ -4754,24 +4761,26 @@ const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; -void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) +int nfs4_release_lockowner(struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; - struct nfs_release_lockowner_args *args; + struct nfs_release_lockowner_data *data; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], }; if (server->nfs_client->cl_mvops->minor_version != 0) - return; - args = kmalloc(sizeof(*args), GFP_NOFS); - if (!args) - return; - args->lock_owner.clientid = server->nfs_client->cl_clientid; - args->lock_owner.id = lsp->ls_seqid.owner_id; - args->lock_owner.s_dev = server->s_dev; - msg.rpc_argp = args; - rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); + return -EINVAL; + data = kmalloc(sizeof(*data), GFP_NOFS); + if (!data) + return -ENOMEM; + data->lsp = lsp; + data->args.lock_owner.clientid = server->nfs_client->cl_clientid; + data->args.lock_owner.id = lsp->ls_seqid.owner_id; + data->args.lock_owner.s_dev = server->s_dev; + msg.rpc_argp = &data->args; + rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); + return 0; } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a58d02a0c27d..7adc46b4c7f8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -784,7 +784,7 @@ out_free: return NULL; } -static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +void nfs4_free_lock_state(struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; @@ -842,8 +842,10 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) - nfs4_release_lockowner(lsp); + if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (nfs4_release_lockowner(lsp) == 0) + return; + } nfs4_free_lock_state(lsp); } From 3114ea7a24d3264c090556a2444fc6d2c06176d4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Mar 2012 16:39:06 -0500 Subject: [PATCH 278/316] NFSv4: Return the delegation if the server returns NFS4ERR_OPENMODE If a setattr() fails because of an NFS4ERR_OPENMODE error, it is probably due to us holding a read delegation. Ensure that the recovery routines return that delegation in this case. Reported-by: Miklos Szeredi Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 026878cb2698..d1989e3f23c3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -182,6 +182,7 @@ struct nfs4_exception { long timeout; int retry; struct nfs4_state *state; + struct inode *inode; }; struct nfs4_state_recovery_ops { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 32e0d08a9771..a8dd04db764f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -262,18 +262,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; + struct inode *inode = exception->inode; int ret = errorcode; exception->retry = 0; switch(errorcode) { case 0: return 0; + case -NFS4ERR_OPENMODE: + if (nfs_have_delegation(inode, FMODE_READ)) { + nfs_inode_return_delegation(inode); + exception->retry = 1; + return 0; + } + if (state == NULL) + break; + nfs4_schedule_stateid_recovery(server, state); + goto wait_on_recovery; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: if (state != NULL) nfs_remove_bad_delegation(state->inode); - case -NFS4ERR_OPENMODE: if (state == NULL) break; nfs4_schedule_stateid_recovery(server, state); @@ -1939,6 +1949,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { .state = state, + .inode = inode, }; int err; do { From 2dc317565b6fd264929b41aaa9674431d75178ef Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 8 Mar 2012 11:03:53 -0500 Subject: [PATCH 279/316] NFSv4.1 cleanup DS stateid error handling The error handler nfs4_state parameter is never NULL in the pNFS case as the open_context must carry an nfs_state. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b2d3bb5971bb..768f6f86c9f0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -99,16 +99,12 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state != NULL) - nfs_remove_bad_delegation(state->inode); + nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: - if (state == NULL) - break; nfs4_schedule_stateid_recovery(mds_server, state); goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(mds_server, state); + nfs4_schedule_stateid_recovery(mds_server, state); nfs4_schedule_lease_recovery(mds_client); goto wait_on_recovery; /* DS session errors */ @@ -145,7 +141,6 @@ wait_on_recovery: if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); goto out; - } /* NFS_PROTO call done callback routines */ From 0032a7a749a49b2c044092a1d0af5cfd0077f35d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 8 Mar 2012 17:16:12 -0500 Subject: [PATCH 280/316] NFS: Don't copy read delegation stateids in setattr The server will just return an NFS4ERR_OPENMODE anyway. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 16 ++++++++++------ fs/nfs/delegation.h | 2 +- fs/nfs/nfs4proc.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 97d53574bf53..e27c0972f94e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -694,21 +694,25 @@ int nfs_delegations_present(struct nfs_client *clp) * nfs4_copy_delegation_stateid - Copy inode's state ID information * @dst: stateid data structure to fill in * @inode: inode to check + * @flags: delegation type requirement * - * Returns one and fills in "dst->data" * if inode had a delegation, - * otherwise zero is returned. + * Returns "true" and fills in "dst->data" * if inode had a delegation, + * otherwise "false" is returned. */ -int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, + fmode_t flags) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int ret = 0; + bool ret; + flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - if (delegation != NULL) { + ret = (delegation != NULL && (delegation->type & flags) == flags); + if (ret) { nfs4_stateid_copy(dst, &delegation->stateid); - ret = 1; + nfs_mark_delegation_referenced(delegation); } rcu_read_unlock(); return ret; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 691a79609184..e193012123e7 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); -int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); +bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a8dd04db764f..3578ad36a5b8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1929,7 +1929,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { + if (nfs4_copy_delegation_stateid(&arg.stateid, inode, FMODE_WRITE)) { /* Use that stateid */ } else if (state != NULL) { nfs4_select_rw_stateid(&arg.stateid, state, current->files, current->tgid); From 4fc8796d23819da814ec25b7793bde8f104f1a2a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 8 Mar 2012 17:42:01 -0500 Subject: [PATCH 281/316] NFSv4: Clean up nfs4_select_rw_stateid() Ensure that we select delegation stateids first, then lock stateids and then open stateids. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4proc.c | 8 +++++--- fs/nfs/nfs4state.c | 47 +++++++++++++++++++++++++++++++++------------- fs/nfs/nfs4xdr.c | 13 +++++++++---- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d1989e3f23c3..b47bdb9c1612 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -330,7 +330,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *, struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); +extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, + fmode_t, fl_owner_t, pid_t); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3578ad36a5b8..3bf5593741ee 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1929,10 +1929,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (nfs4_copy_delegation_stateid(&arg.stateid, inode, FMODE_WRITE)) { + if (state != NULL) { + nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + current->files, current->tgid); + } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, + FMODE_WRITE)) { /* Use that stateid */ - } else if (state != NULL) { - nfs4_select_rw_stateid(&arg.stateid, state, current->files, current->tgid); } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7adc46b4c7f8..de44804d9864 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -886,28 +886,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -/* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. - */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) +static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, + fl_owner_t fl_owner, pid_t fl_pid) { struct nfs4_lock_state *lsp; + bool ret = false; + + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + goto out; + + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { + nfs4_stateid_copy(dst, &lsp->ls_stateid); + ret = true; + } + spin_unlock(&state->state_lock); + nfs4_put_lock_state(lsp); +out: + return ret; +} + +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +{ int seq; do { seq = read_seqbegin(&state->seqlock); nfs4_stateid_copy(dst, &state->stateid); } while (read_seqretry(&state->seqlock, seq)); - if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) - return; +} - spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) - nfs4_stateid_copy(dst, &lsp->ls_stateid); - spin_unlock(&state->state_lock); - nfs4_put_lock_state(lsp); +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, + fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) +{ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) + return; + if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) + return; + nfs4_copy_open_stateid(dst, state); } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e4bb8e6409a7..f7e064d997f6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1491,12 +1491,17 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) +static void encode_open_stateid(struct xdr_stream *xdr, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode, + int zero_seqid) { nfs4_stateid stateid; if (ctx->state != NULL) { - nfs4_select_rw_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); + nfs4_select_rw_stateid(&stateid, ctx->state, + fmode, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) stateid.seqid = 0; encode_nfs4_stateid(xdr, &stateid); @@ -1510,7 +1515,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); encode_open_stateid(xdr, args->context, args->lock_context, - hdr->minorversion); + FMODE_READ, hdr->minorversion); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1648,7 +1653,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); encode_open_stateid(xdr, args->context, args->lock_context, - hdr->minorversion); + FMODE_WRITE, hdr->minorversion); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); From ad1e3968292e3af1c49ccbd0fb7d2674010f8efc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 10 Mar 2012 11:23:15 -0500 Subject: [PATCH 282/316] NFSv4.0: Re-establish the callback channel on NFS4ERR_CB_PATHDOWN When the NFSv4.0 server tells us that it can no-longer talk to us on the callback channel, we should attempt a new SETCLIENTID in order to re-transmit the callback channel information. Note that as long as we do not change the boot verifier, this is a safe procedure; the server is required to keep our state. Also move the function nfs_handle_cb_pathdown to fs/nfs/nfs4state.c, and change the name in order to mark it as being specific to NFSv4.0. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 17 ----------------- fs/nfs/delegation.h | 1 - fs/nfs/nfs4state.c | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index e27c0972f94e..12de88353eeb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -453,11 +453,6 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, rcu_read_unlock(); } -static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) -{ - nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); -} - static void nfs_delegation_run_state_manager(struct nfs_client *clp) { if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) @@ -498,18 +493,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp) nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } -/** - * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN - * @clp: client to process - * - */ -void nfs_handle_cb_pathdown(struct nfs_client *clp) -{ - if (clp == NULL) - return; - nfs_client_mark_return_all_delegations(clp); -} - static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index e193012123e7..cd6a7a8dadae 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,7 +42,6 @@ void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); -void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); void nfs_remove_bad_delegation(struct inode *inode); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index de44804d9864..5fa43cd9bfc5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1097,9 +1097,23 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); +/* + * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN + * @clp: client to process + * + * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a + * resend of the SETCLIENTID and hence re-establish the + * callback channel. Then return all existing delegations. + */ +static void nfs40_handle_cb_pathdown(struct nfs_client *clp) +{ + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs_expire_all_delegations(clp); +} + void nfs4_schedule_path_down_recovery(struct nfs_client *clp) { - nfs_handle_cb_pathdown(clp); + nfs40_handle_cb_pathdown(clp); nfs4_schedule_state_manager(clp); } @@ -1444,7 +1458,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case 0: break; case -NFS4ERR_CB_PATH_DOWN: - nfs_handle_cb_pathdown(clp); + nfs40_handle_cb_pathdown(clp); break; case -NFS4ERR_NO_GRACE: nfs4_state_end_reclaim_reboot(clp); From 9994b62b5621f88828d442fcd03fe3ce4c43344b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 8 Mar 2012 17:29:34 -0500 Subject: [PATCH 283/316] NFS: remove NFS_PAGE_TAG_LOCKED The last real use of this tag was removed by commit 7f2f12d963 NFS: Simplify nfs_wb_page() Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 32 +------------------------------- fs/nfs/write.c | 20 +++++++++----------- include/linux/nfs_page.h | 3 --- 3 files changed, 10 insertions(+), 45 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 77a184e2fe47..fc5b54b84f8f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -107,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req) nfs_release_request(req); } -/** - * nfs_set_page_tag_locked - Tag a request as locked - * @req: - */ -int nfs_set_page_tag_locked(struct nfs_page *req) -{ - if (!nfs_lock_request_dontget(req)) - return 0; - if (test_bit(PG_MAPPED, &req->wb_flags)) - radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - return 1; -} - -/** - * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers - */ -void nfs_clear_page_tag_locked(struct nfs_page *req) -{ - if (test_bit(PG_MAPPED, &req->wb_flags)) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - - spin_lock(&inode->i_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - nfs_unlock_request(req); - spin_unlock(&inode->i_lock); - } else - nfs_unlock_request(req); -} - /* * nfs_clear_request - Free up all resources allocated to the request * @req: @@ -469,7 +439,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; - if (nfs_set_page_tag_locked(req)) { + if (nfs_lock_request_dontget(req)) { kref_get(&req->wb_kref); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, tag); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0b1831d95849..fd8a4f07bc0c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -236,10 +236,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo req = nfs_page_find_request_locked(page); if (req == NULL) break; - if (nfs_set_page_tag_locked(req)) + if (nfs_lock_request_dontget(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_set_page_tag_locked() will always + * then the call to nfs_lock_request_dontget() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ @@ -397,8 +397,6 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_LOCKED); spin_unlock(&inode->i_lock); radix_tree_preload_end(); out: @@ -604,7 +602,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, || end < req->wb_offset) goto out_flushme; - if (nfs_set_page_tag_locked(req)) + if (nfs_lock_request_dontget(req)) break; /* The request is locked, so wait and then retry */ @@ -684,7 +682,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_mark_request_dirty(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); return 0; } @@ -777,7 +775,7 @@ static void nfs_writepage_release(struct nfs_page *req, if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) nfs_inode_remove_request(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } @@ -925,7 +923,7 @@ static void nfs_redirty_request(struct nfs_page *req) struct page *page = req->wb_page; nfs_mark_request_dirty(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } @@ -1199,7 +1197,7 @@ static void nfs_writeback_release_full(void *calldata) remove_request: nfs_inode_remove_request(req); next: - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } nfs_writedata_release(calldata); @@ -1411,7 +1409,7 @@ void nfs_retry_commit(struct list_head *page_list, dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); } } EXPORT_SYMBOL_GPL(nfs_retry_commit); @@ -1486,7 +1484,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); } } EXPORT_SYMBOL_GPL(nfs_commit_release_pages); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ab465fe8c3d6..65b563f0903a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -21,7 +21,6 @@ /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_LOCKED 0 #define NFS_PAGE_TAG_COMMIT 1 /* @@ -106,8 +105,6 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern int nfs_set_page_tag_locked(struct nfs_page *req); -extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* * Lock the page of an asynchronous request without getting a new reference From d6d6dc7cdfda7c8f49a89a7b7261846f319da6d1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 8 Mar 2012 17:29:35 -0500 Subject: [PATCH 284/316] NFS: remove nfs_inode radix tree The radix tree is only being used to compile lists of reqs needing commit. It is simpler to just put the reqs directly into a list. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 2 + fs/nfs/nfs4filelayout.c | 109 ++++++++++++++++++++++++++++------- fs/nfs/nfs4filelayout.h | 7 ++- fs/nfs/pagelist.c | 61 -------------------- fs/nfs/pnfs.h | 82 +++++++++++++------------- fs/nfs/write.c | 120 ++++++++++++++++++++++----------------- include/linux/nfs_fs.h | 6 +- include/linux/nfs_page.h | 13 +---- 9 files changed, 208 insertions(+), 194 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 70e25c9c5670..1a19f8d30c14 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1560,7 +1560,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + INIT_LIST_HEAD(&nfsi->commit_list); nfsi->npages = 0; nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0c3648a947d1..04a914704e7b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -308,6 +308,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + int max); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 768f6f86c9f0..716fac6bc082 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -682,14 +682,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); + fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; } fl->number_of_buckets = size; - for (i = 0; i < size; i++) - INIT_LIST_HEAD(&fl->commit_buckets[i]); + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&fl->commit_buckets[i].written); + INIT_LIST_HEAD(&fl->commit_buckets[i].committing); + } } return &fl->generic_hdr; } @@ -767,11 +769,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, }; -static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) -{ - return !FILELAYOUT_LSEG(lseg)->commit_through_mds; -} - static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) { if (fl->stripe_type == STRIPE_SPARSE) @@ -780,13 +777,39 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) return j; } -struct list_head *filelayout_choose_commit_list(struct nfs_page *req) +/* The generic layer is about to remove the req from the commit list. + * If this will make the bucket empty, it will need to put the lseg reference. + * Note inode lock is held, so we can't do the put here. + */ +static struct pnfs_layout_segment * +filelayout_remove_commit_req(struct nfs_page *req) +{ + if (list_is_singular(&req->wb_list)) { + struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layout_segment *lseg; + + /* From here we can find the bucket, but for the moment, + * since there is only one relevant lseg... + */ + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { + if (lseg->pls_range.iomode == IOMODE_RW) + return lseg; + } + } + return NULL; +} + +static struct list_head * +filelayout_choose_commit_list(struct nfs_page *req, + struct pnfs_layout_segment *lseg) { - struct pnfs_layout_segment *lseg = req->wb_commit_lseg; struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; + if (fl->commit_through_mds) + return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; + /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive * alternative is to add a field to nfs_write_data and nfs_page @@ -796,9 +819,14 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req) j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i]; + list = &fl->commit_buckets[i].written; if (list_empty(list)) { - /* Non-empty buckets hold a reference on the lseg */ + /* Non-empty buckets hold a reference on the lseg. That ref + * is normally transferred to the COMMIT call and released + * there. It could also be released if the last req is pulled + * off due to a rewrite, in which case it will be done in + * filelayout_remove_commit_req + */ get_lseg(lseg); } return list; @@ -860,18 +888,56 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) /* * This is only useful while we are using whole file layouts. */ -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +static struct pnfs_layout_segment * +find_only_write_lseg_locked(struct inode *inode) { - struct pnfs_layout_segment *lseg, *rv = NULL; + struct pnfs_layout_segment *lseg; - spin_lock(&inode->i_lock); list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) if (lseg->pls_range.iomode == IOMODE_RW) - rv = get_lseg(lseg); + return get_lseg(lseg); + return NULL; +} + +static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +{ + struct pnfs_layout_segment *rv; + + spin_lock(&inode->i_lock); + rv = find_only_write_lseg_locked(inode); spin_unlock(&inode->i_lock); return rv; } +/* Move reqs from written to committing lists, returning count of number moved. + * Note called with i_lock held. + */ +static int filelayout_scan_commit_lists(struct inode *inode, int max) +{ + struct pnfs_layout_segment *lseg; + struct nfs4_filelayout_segment *fl; + int i, rv = 0, cnt; + + lseg = find_only_write_lseg_locked(inode); + if (!lseg) + return 0; + fl = FILELAYOUT_LSEG(lseg); + if (fl->commit_through_mds) + goto out_put; + for (i = 0; i < fl->number_of_buckets; i++) { + if (list_empty(&fl->commit_buckets[i].written)) + continue; + cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written, + &fl->commit_buckets[i].committing, + max); + max -= cnt; + rv += cnt; + } +out_put: + put_lseg(lseg); + return rv; +} + static int alloc_ds_commits(struct inode *inode, struct list_head *list) { struct pnfs_layout_segment *lseg; @@ -886,7 +952,7 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) return 0; fl = FILELAYOUT_LSEG(lseg); for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; data = nfs_commitdata_alloc(); if (!data) @@ -900,9 +966,9 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) out_bad: for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; - nfs_retry_commit(&fl->commit_buckets[i], lseg); + nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); put_lseg(lseg); /* associated with emptying bucket */ } put_lseg(lseg); @@ -937,7 +1003,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); + nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } @@ -967,8 +1033,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, + .remove_commit_req = filelayout_remove_commit_req, + .scan_commit_lists = filelayout_scan_commit_lists, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2e42284253fa..21190bb1f5e3 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; +struct nfs4_fl_commit_bucket { + struct list_head written; + struct list_head committing; +}; + struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -84,7 +89,7 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct list_head *commit_buckets; /* Sort commits to ds */ + struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ int number_of_buckets; }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fc5b54b84f8f..d21fceaa9f62 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -396,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } -#define NFS_SCAN_MAXENTRIES 16 -/** - * nfs_scan_list - Scan a list for matching requests - * @nfsi: NFS inode - * @dst: Destination list - * @idx_start: lower bound of page->index to scan - * @npages: idx_start + npages sets the upper bound to scan. - * @tag: tag to scan for - * - * Moves elements from one of the inode request lists. - * If the number of requests is set to 0, the entire address_space - * starting at index idx_start, is scanned. - * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's i_lock when calling this function - */ -int nfs_scan_list(struct nfs_inode *nfsi, - struct list_head *dst, pgoff_t idx_start, - unsigned int npages, int tag) -{ - struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; - struct nfs_page *req; - pgoff_t idx_end; - int found, i; - int res; - struct list_head *list; - - res = 0; - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - for (;;) { - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES, tag); - if (found <= 0) - break; - for (i = 0; i < found; i++) { - req = pgvec[i]; - if (req->wb_index > idx_end) - goto out; - idx_start = req->wb_index + 1; - if (nfs_lock_request_dontget(req)) { - kref_get(&req->wb_kref); - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, tag); - list = pnfs_choose_commit_list(req, dst); - nfs_list_add_request(req, list); - res++; - if (res == INT_MAX) - goto out; - } - } - /* for latency reduction */ - cond_resched_lock(&nfsi->vfs_inode.i_lock); - } -out: - return res; -} - int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8088d51f495e..ef92f676cf1e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - /* Returns true if layoutdriver wants to divert this request to - * driver's commit routine. - */ - bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); - struct list_head * (*choose_commit_list) (struct nfs_page *req); + struct list_head * (*choose_commit_list) (struct nfs_page *req, + struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *(*remove_commit_req) (struct nfs_page *req); + int (*scan_commit_lists) (struct inode *inode, int max); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); /* @@ -262,20 +261,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ - if (lseg) { - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; - if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { - set_bit(PG_PNFS_COMMIT, &req->wb_flags); - req->wb_commit_lseg = get_lseg(lseg); - } - } -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -285,26 +270,38 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) } static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) { + struct inode *inode = req->wb_context->dentry->d_inode; struct list_head *rv; - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { - struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; - - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); - /* matched by ref taken when PG_PNFS_COMMIT is set */ - put_lseg(req->wb_commit_lseg); - } else - rv = mds; + if (lseg && NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list) + rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req, lseg); + else + rv = &NFS_I(inode)->commit_list; return rv; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline struct pnfs_layout_segment * +pnfs_clear_request_commit(struct nfs_page *req) { - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) - put_lseg(req->wb_commit_lseg); + struct inode *inode = req->wb_context->dentry->d_inode; + + if (NFS_SERVER(inode)->pnfs_curr_ld && + NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req) + return NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req(req); + else + return NULL; +} + +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max) +{ + if (NFS_SERVER(inode)->pnfs_curr_ld && + NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists) + return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(inode, max); + else + return 0; } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -400,11 +397,6 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st return false; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -412,13 +404,23 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) } static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - return mds; + struct inode *inode = req->wb_context->dentry->d_inode; + + return &NFS_I(inode)->commit_list; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline struct pnfs_layout_segment * +pnfs_clear_request_commit(struct nfs_page *req) { + return NULL; +} + +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max) +{ + return 0; } static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fd8a4f07bc0c..a630ad65d64c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -375,21 +375,14 @@ out_err: /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); - int error; - - error = radix_tree_preload(GFP_NOFS); - if (error != 0) - goto out; /* Lock the request! */ nfs_lock_request_dontget(req); spin_lock(&inode->i_lock); - error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); @@ -398,11 +391,10 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); - radix_tree_preload_end(); -out: - return error; } +static struct pnfs_layout_segment *nfs_clear_request_commit(struct nfs_page *req); + /* * Remove a write request from an inode */ @@ -410,16 +402,18 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg; BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); + lseg = nfs_clear_request_commit(req); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); - radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; spin_unlock(&inode->i_lock); + put_lseg(lseg); nfs_release_request(req); } @@ -438,31 +432,38 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct list_head *clist; + clist = pnfs_choose_commit_list(req, lseg); spin_lock(&inode->i_lock); set_bit(PG_CLEAN, &(req)->wb_flags); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, - NFS_PAGE_TAG_COMMIT); + nfs_list_add_request(req, clist); nfsi->ncommit++; spin_unlock(&inode->i_lock); - pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } -static int +static void +nfs_clear_page_commit(struct page *page) +{ + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); +} + +static struct pnfs_layout_segment * nfs_clear_request_commit(struct nfs_page *req) { - struct page *page = req->wb_page; + struct pnfs_layout_segment *lseg = NULL; if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { - dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); - return 1; + nfs_clear_page_commit(req->wb_page); + lseg = pnfs_clear_request_commit(req); + NFS_I(req->wb_context->dentry->d_inode)->ncommit--; + list_del(&req->wb_list); } - return 0; + return lseg; } static inline @@ -494,10 +495,10 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static inline int +static inline struct pnfs_layout_segment * nfs_clear_request_commit(struct nfs_page *req) { - return 0; + return NULL; } static inline @@ -518,46 +519,67 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, static int nfs_need_commit(struct nfs_inode *nfsi) { - return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); + return nfsi->ncommit > 0; } +/* i_lock held by caller */ +int +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) +{ + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (nfs_lock_request_dontget(req)) { + kref_get(&req->wb_kref); + list_move_tail(&req->wb_list, dst); + clear_bit(PG_CLEAN, &(req)->wb_flags); + ret++; + if (ret == max) + break; + } + } + return ret; +} +EXPORT_SYMBOL_GPL(nfs_scan_commit_list); + /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan * @dst: destination list - * @idx_start: lower bound of page->index to scan. - * @npages: idx_start + npages sets the upper bound to scan. * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +nfs_scan_commit(struct inode *inode, struct list_head *dst) { struct nfs_inode *nfsi = NFS_I(inode); - int ret; - - if (!nfs_need_commit(nfsi)) - return 0; + int ret = 0; spin_lock(&inode->i_lock); - ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); - if (ret > 0) + if (nfsi->ncommit > 0) { + int pnfs_ret; + + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, INT_MAX); + pnfs_ret = pnfs_scan_commit_lists(inode, INT_MAX - ret); + if (pnfs_ret) { + ret += pnfs_ret; + set_bit(NFS_INO_PNFS_COMMIT, &nfsi->flags); + } nfsi->ncommit -= ret; + } spin_unlock(&inode->i_lock); - - if (nfs_need_commit(NFS_I(inode))) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; } + #else static inline int nfs_need_commit(struct nfs_inode *nfsi) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) { return 0; } @@ -579,6 +601,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, unsigned int rqend; unsigned int end; int error; + struct pnfs_layout_segment *lseg = NULL; if (!PagePrivate(page)) return NULL; @@ -614,12 +637,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req) && - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { - NFS_I(inode)->ncommit--; - pnfs_clear_request_commit(req); - } + lseg = nfs_clear_request_commit(req); /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -632,6 +650,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); + put_lseg(lseg); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -653,7 +672,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, { struct inode *inode = page->mapping->host; struct nfs_page *req; - int error; req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) @@ -661,11 +679,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(req)) goto out; - error = nfs_inode_add_request(inode, req); - if (error != 0) { - nfs_release_request(req); - req = ERR_PTR(error); - } + nfs_inode_add_request(inode, req); out: return req; } @@ -1458,7 +1472,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - nfs_clear_request_commit(req); + nfs_clear_page_commit(req->wb_page); dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->dentry->d_sb->s_id, @@ -1515,7 +1529,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head, 0, 0); + res = nfs_scan_commit(inode, &head); if (res) { int error; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ce8e4361ad14..0a63ab2b5a76 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -171,13 +171,9 @@ struct nfs_inode { */ __be32 cookieverf[2]; - /* - * This is the list of dirty unwritten pages. - */ - struct radix_tree_root nfs_page_tree; - unsigned long npages; unsigned long ncommit; + struct list_head commit_list; /* Open contexts for shared mmap writes */ struct list_head open_files; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 65b563f0903a..50856e9c1e5f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -18,11 +18,6 @@ #include -/* - * Valid flags for the radix tree - */ -#define NFS_PAGE_TAG_COMMIT 1 - /* * Valid flags for a dirty buffer */ @@ -32,16 +27,12 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, - PG_PNFS_COMMIT, PG_PARTIAL_READ_FAILED, }; struct nfs_inode; struct nfs_page { - union { - struct list_head wb_list; /* Defines state of page: */ - struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */ - }; + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ @@ -89,8 +80,6 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, - pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, From cb9c1c4a880bc734c2848f8647be2cfa336ee346 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Sun, 11 Mar 2012 18:20:23 +0400 Subject: [PATCH 285/316] NFS: replace global bl_mount_reply with per-net one This global variable is used for blocklayout downcall and thus can be corrupted if case of existence of multiple networks namespaces. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.h | 5 ----- fs/nfs/blocklayout/blocklayoutdev.c | 9 +++++---- fs/nfs/netns.h | 6 ++++++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 0966b39bbcfb..58ac8614c4c4 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -153,11 +153,6 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) return BLK_LO2EXT(lseg->pls_layout); } -struct bl_dev_msg { - int32_t status; - uint32_t major, minor; -}; - struct bl_msg_hdr { u8 type; u16 totallen; /* length of entire message, including hdr itself */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index b48f782a94ad..1d58642b1530 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -79,15 +79,16 @@ int nfs4_blkdev_put(struct block_device *bdev) return blkdev_put(bdev, FMODE_READ); } -static struct bl_dev_msg bl_mount_reply; - ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { + struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + nfs_net_id); + if (mlen != sizeof (struct bl_dev_msg)) return -EINVAL; - if (copy_from_user(&bl_mount_reply, src, mlen) != 0) + if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) return -EFAULT; wake_up(&bl_wq); @@ -118,10 +119,10 @@ nfs4_blk_decode_device(struct nfs_server *server, }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); - struct bl_dev_msg *reply = &bl_mount_reply; int offset, len, i, rc; struct net *net = server->nfs_client->net; struct nfs_net *nn = net_generic(net, nfs_net_id); + struct bl_dev_msg *reply = &nn->bl_mount_reply; dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 7baad89ae60e..73425f555cde 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -4,9 +4,15 @@ #include #include +struct bl_dev_msg { + int32_t status; + uint32_t major, minor; +}; + struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; + struct bl_dev_msg bl_mount_reply; struct list_head nfs_client_list; struct list_head nfs_volume_list; #ifdef CONFIG_NFS_V4 From 5ffaf8554163d9f3873988ce2f9977f6c6f408d2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Sun, 11 Mar 2012 18:20:31 +0400 Subject: [PATCH 286/316] NFS: replace global bl_wq with per-net one This queue is used for sleeping in kernel and it have to be per-net since we don't want to wake any other waiters except in out network nemespace. BTW, move wq to per-net data is easy. But some way to handle upcall timeouts have to be provided. On message destroy in case of timeout, tasks, waiting for message to be delivered, should be awakened. Thus, some data required to located the right wait queue. Chosen solution replaces rpc_pipe_msg object with new introduced bl_pipe_msg object, containing rpc_pipe_msg and proper wq. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 4 +--- fs/nfs/blocklayout/blocklayout.h | 7 +++++-- fs/nfs/blocklayout/blocklayoutdev.c | 32 ++++++++++++++++------------- fs/nfs/blocklayout/blocklayoutdm.c | 26 ++++++++++++----------- fs/nfs/netns.h | 1 + 5 files changed, 39 insertions(+), 31 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 783ebd51bd5f..61501346324e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -46,8 +46,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); -wait_queue_head_t bl_wq; - static void print_page(struct page *page) { dprintk("PRINTPAGE page %p\n", page); @@ -1117,6 +1115,7 @@ static int nfs4blocklayout_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); struct dentry *dentry; + init_waitqueue_head(&nn->bl_wq); nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); if (IS_ERR(nn->bl_device_pipe)) return PTR_ERR(nn->bl_device_pipe); @@ -1153,7 +1152,6 @@ static int __init nfs4blocklayout_init(void) if (ret) goto out; - init_waitqueue_head(&bl_wq); ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); if (ret) goto out_remove; diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 58ac8614c4c4..03350690118e 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -153,13 +153,16 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) return BLK_LO2EXT(lseg->pls_layout); } +struct bl_pipe_msg { + struct rpc_pipe_msg msg; + wait_queue_head_t *bl_wq; +}; + struct bl_msg_hdr { u8 type; u16 totallen; /* length of entire message, including hdr itself */ }; -extern wait_queue_head_t bl_wq; - #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 1d58642b1530..a5c88a554d92 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -91,16 +91,18 @@ ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) return -EFAULT; - wake_up(&bl_wq); + wake_up(&nn->bl_wq); return mlen; } void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); + if (msg->errno >= 0) return; - wake_up(&bl_wq); + wake_up(bl_pipe_msg->bl_wq); } /* @@ -112,7 +114,8 @@ nfs4_blk_decode_device(struct nfs_server *server, { struct pnfs_block_dev *rv; struct block_device *bd = NULL; - struct rpc_pipe_msg msg; + struct bl_pipe_msg bl_pipe_msg; + struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_MOUNT, .totallen = dev->mincount, @@ -128,15 +131,16 @@ nfs4_blk_decode_device(struct nfs_server *server, dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, dev->mincount); - memset(&msg, 0, sizeof(msg)); - msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); - if (!msg.data) { + bl_pipe_msg.bl_wq = &nn->bl_wq; + memset(msg, 0, sizeof(*msg)); + msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); + if (!msg->data) { rv = ERR_PTR(-ENOMEM); goto out; } - memcpy(msg.data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg.data; + memcpy(msg->data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg->data; len = dev->mincount; offset = sizeof(bl_msg); for (i = 0; len > 0; i++) { @@ -145,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server, len -= PAGE_CACHE_SIZE; offset += PAGE_CACHE_SIZE; } - msg.len = sizeof(bl_msg) + dev->mincount; + msg->len = sizeof(bl_msg) + dev->mincount; dprintk("%s CALLING USERSPACE DAEMON\n", __func__); - add_wait_queue(&bl_wq, &wq); - rc = rpc_queue_upcall(nn->bl_device_pipe, &msg); + add_wait_queue(&nn->bl_wq, &wq); + rc = rpc_queue_upcall(nn->bl_device_pipe, msg); if (rc < 0) { - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); rv = ERR_PTR(rc); goto out; } @@ -159,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server, set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); if (reply->status != BL_DEVICE_REQUEST_PROC) { dprintk("%s failed to open device: %d\n", @@ -191,7 +195,7 @@ nfs4_blk_decode_device(struct nfs_server *server, bd->bd_block_size); out: - kfree(msg.data); + kfree(msg->data); return rv; } diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index a0f588fa49c1..30fc22af7bbb 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -40,7 +40,8 @@ static void dev_remove(struct net *net, dev_t dev) { - struct rpc_pipe_msg msg; + struct bl_pipe_msg bl_pipe_msg; + struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; struct bl_dev_msg bl_umount_request; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_UMOUNT, @@ -52,33 +53,34 @@ static void dev_remove(struct net *net, dev_t dev) dprintk("Entering %s\n", __func__); - memset(&msg, 0, sizeof(msg)); - msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); - if (!msg.data) + bl_pipe_msg.bl_wq = &nn->bl_wq; + memset(&msg, 0, sizeof(*msg)); + msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + if (!msg->data) goto out; memset(&bl_umount_request, 0, sizeof(bl_umount_request)); bl_umount_request.major = MAJOR(dev); bl_umount_request.minor = MINOR(dev); - memcpy(msg.data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg.data; + memcpy(msg->data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg.len = sizeof(bl_msg) + bl_msg.totallen; + msg->len = sizeof(bl_msg) + bl_msg.totallen; - add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(nn->bl_device_pipe, &msg) < 0) { - remove_wait_queue(&bl_wq, &wq); + add_wait_queue(&nn->bl_wq, &wq); + if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { + remove_wait_queue(&nn->bl_wq, &wq); goto out; } set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); out: - kfree(msg.data); + kfree(msg->data); } /* diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 73425f555cde..aa14ec303e94 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -13,6 +13,7 @@ struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; struct bl_dev_msg bl_mount_reply; + wait_queue_head_t bl_wq; struct list_head nfs_client_list; struct list_head nfs_volume_list; #ifdef CONFIG_NFS_V4 From 17280175c587469b34757263c7cfc608f0ea2334 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Mar 2012 13:11:00 -0400 Subject: [PATCH 287/316] NFS: Fix a number of sparse warnings Fix a number of "warning: symbol 'foo' was not declared. Should it be static?" conditions. Fix 2 cases of "warning: Using plain integer as NULL pointer" fs/nfs/delegation.c:263:31: warning: restricted fmode_t degrades to integer - We want to allow upgrades to a WRITE delegation, but should otherwise consider servers that hand out duplicate delegations to be borken. Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 4 ++-- fs/nfs/client.c | 5 ++--- fs/nfs/delegation.c | 5 ++++- fs/nfs/dns_resolve.c | 1 + fs/nfs/idmap.c | 8 ++++---- fs/nfs/nfs3acl.c | 2 +- fs/nfs/nfs4filelayout.c | 10 +++++----- fs/nfs/nfs4filelayoutdev.c | 4 ++-- fs/nfs/nfs4proc.c | 23 ++++++++++++----------- fs/nfs/nfs4state.c | 3 ++- fs/nfs/objlayout/objlayout.c | 2 +- fs/nfs/pnfs_dev.c | 2 +- fs/nfs/unlink.c | 2 +- 13 files changed, 38 insertions(+), 33 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 2afe23349c7b..eb95f5091c1a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -101,7 +101,7 @@ nfs4_callback_svc(void *vrqstp) /* * Prepare to bring up the NFSv4 callback service */ -struct svc_rqst * +static struct svc_rqst * nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { int ret; @@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp) /* * Bring up the NFSv4.1 callback service */ -struct svc_rqst * +static struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { struct svc_rqst *rqstp; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d30dcbfb6b20..f1f047c376d9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -443,9 +443,8 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, } /* Common match routine for v4.0 and v4.1 callback services */ -bool -nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, - u32 minorversion) +static bool nfs4_cb_match_client(const struct sockaddr *addr, + struct nfs_client *clp, u32 minorversion) { struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 12de88353eeb..89af1d269274 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -256,11 +256,14 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct /* * Deal with broken servers that hand out two * delegations for the same file. + * Allow for upgrades to a WRITE delegation, but + * nothing else. */ dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", __func__, clp->cl_hostname); - if (delegation->type <= old_delegation->type) { + if (delegation->type == old_delegation->type || + !(delegation->type & FMODE_WRITE)) { freeme = delegation; delegation = NULL; goto out; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index fcd8f1d7430f..b3924b8a6000 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include #include +#include "dns_resolve.h" ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, struct sockaddr *sa, size_t salen) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f72c1fc074e1..f9f89fc83ee0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -54,8 +54,8 @@ /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; -const struct cred *id_resolver_cache; -struct key_type key_type_id_resolver_legacy; +static const struct cred *id_resolver_cache; +static struct key_type key_type_id_resolver_legacy; /** @@ -160,7 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) return snprintf(buf, buflen, "%u", id); } -struct key_type key_type_id_resolver = { +static struct key_type key_type_id_resolver = { .name = "id_resolver", .instantiate = user_instantiate, .match = user_match, @@ -381,7 +381,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { .destroy_msg = idmap_pipe_destroy_msg, }; -struct key_type key_type_id_resolver_legacy = { +static struct key_type key_type_id_resolver_legacy = { .name = "id_resolver", .instantiate = user_instantiate, .match = user_match, diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7ef23979896d..e4498dc351a8 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) .pages = pages, }; struct nfs3_getaclres res = { - 0 + NULL, }; struct rpc_message msg = { .rpc_argp = &args, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 716fac6bc082..379a085f8f25 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -323,21 +323,21 @@ static void filelayout_commit_release(void *data) nfs_commitdata_release(wdata); } -struct rpc_call_ops filelayout_read_call_ops = { +static const struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, .rpc_count_stats = filelayout_read_count_stats, .rpc_release = filelayout_read_release, }; -struct rpc_call_ops filelayout_write_call_ops = { +static const struct rpc_call_ops filelayout_write_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_write_release, }; -struct rpc_call_ops filelayout_commit_call_ops = { +static const struct rpc_call_ops filelayout_commit_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, .rpc_count_stats = filelayout_write_count_stats, @@ -723,7 +723,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } -void +static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -740,7 +740,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_reset_read_mds(pgio); } -void +static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 41677f0bf792..a866bbd2890a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -45,7 +45,7 @@ * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static DEFINE_SPINLOCK(nfs4_ds_cache_lock); static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ @@ -108,7 +108,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } -bool +static bool _same_data_server_addrs_locked(const struct list_head *dsaddrs1, const struct list_head *dsaddrs2) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3bf5593741ee..36a7cda03445 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -677,12 +677,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) nfs41_sequence_done(task, data->seq_res); } -struct rpc_call_ops nfs41_call_sync_ops = { +static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_prepare = nfs41_call_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; -struct rpc_call_ops nfs41_call_priv_sync_ops = { +static const struct rpc_call_ops nfs41_call_priv_sync_ops = { .rpc_call_prepare = nfs41_call_priv_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; @@ -4770,7 +4770,7 @@ static void nfs4_release_lockowner_release(void *calldata) kfree(calldata); } -const struct rpc_call_ops nfs4_release_lockowner_ops = { +static const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; @@ -4910,7 +4910,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct return status; } -int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) +static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, + struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { }; int err; @@ -5096,7 +5097,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } -struct rpc_call_ops nfs4_get_lease_time_ops = { +static const struct rpc_call_ops nfs4_get_lease_time_ops = { .rpc_call_prepare = nfs4_get_lease_time_prepare, .rpc_call_done = nfs4_get_lease_time_done, }; @@ -6319,7 +6320,7 @@ static bool nfs4_match_stateid(const nfs4_stateid *s1, } -struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6329,7 +6330,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6340,7 +6341,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, @@ -6350,7 +6351,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs41_open_expired, @@ -6360,14 +6361,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { +static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { .sched_state_renewal = nfs4_proc_async_renew, .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, .renew_lease = nfs4_proc_renew, }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { +static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { .sched_state_renewal = nfs41_proc_async_sequence, .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, .renew_lease = nfs4_proc_sequence, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5fa43cd9bfc5..7c586070d028 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -876,7 +876,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_flags & FL_POSIX) lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); + lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, + NFS4_FLOCK_LOCK_TYPE); else return -EINVAL; if (lsp == NULL) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 2bd185277adb..157c47e277e0 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -156,7 +156,7 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1 : NFS4_MAX_UINT64; } -void _fix_verify_io_params(struct pnfs_layout_segment *lseg, +static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, struct page ***p_pages, unsigned *p_pgbase, u64 offset, unsigned long count) { diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 4f359d2a26eb..6b4cd3849306 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -92,7 +92,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, * @clp nfs_client associated with deviceid * @id deviceid to look up */ -struct nfs4_deviceid_node * +static struct nfs4_deviceid_node * _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id, long hash) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 490613b709b6..fae71c9f5050 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -108,7 +108,7 @@ static void nfs_async_unlink_release(void *calldata) } #if defined(CONFIG_NFS_V4_1) -void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; struct nfs_server *server = NFS_SERVER(data->dir); From 09acfea5d8de419ebe84be43b08f7b79c965215f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Mar 2012 15:22:54 -0400 Subject: [PATCH 288/316] SUNRPC: Fix a few sparse warnings net/sunrpc/svcsock.c:412:22: warning: incorrect type in assignment (different address spaces) - svc_partial_recvfrom now takes a struct kvec, so the variable save_iovbase needs to be an ordinary (void *) Make a bunch of variables in net/sunrpc/xprtsock.c static Fix a couple of "warning: symbol 'foo' was not declared. Should it be static?" reports. Fix a couple of conflicting function declarations. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 2 +- include/linux/sunrpc/svcauth.h | 3 +++ include/linux/sunrpc/xprtsock.h | 12 ------------ net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- net/sunrpc/auth_gss/gss_krb5_seal.c | 2 +- net/sunrpc/backchannel_rqst.c | 1 + net/sunrpc/rpc_pipe.c | 2 +- net/sunrpc/sunrpc_syms.c | 3 --- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xprtsock.c | 10 +++++----- 10 files changed, 14 insertions(+), 25 deletions(-) diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index f7f3ce340c08..969c0a671dbf 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -35,7 +35,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); -void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs); +void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); int bc_send(struct rpc_rqst *req); /* diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 25d333c1b571..548790e9113b 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -135,6 +135,9 @@ extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(struct svc_xprt *xpt); extern int svcauth_unix_set_client(struct svc_rqst *rqstp); +extern int unix_gid_cache_create(struct net *net); +extern void unix_gid_cache_destroy(struct net *net); + static inline unsigned long hash_str(char *name, int bits) { unsigned long hash = 0; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 3f14a02e9cc0..1ad36cc25b2e 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -12,18 +12,6 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); -/* - * RPC slot table sizes for UDP, TCP transports - */ -extern unsigned int xprt_udp_slot_table_entries; -extern unsigned int xprt_tcp_slot_table_entries; - -/* - * Parameters for choosing a free port - */ -extern unsigned int xprt_min_resvport; -extern unsigned int xprt_max_resvport; - #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) #define RPC_DEF_MIN_RESVPORT (665U) diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 8c67890de427..8eff8c32d1b9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -344,7 +344,7 @@ out_err: return PTR_ERR(p); } -struct crypto_blkcipher * +static struct crypto_blkcipher * context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) { struct crypto_blkcipher *cp; diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index d7941eab7796..62ae3273186c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -159,7 +159,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } -u32 +static u32 gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token) { diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 3ad435a14ada..31def68a0f6e 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -25,6 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_TRANS diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7d96e3cd57cc..8584ec068e97 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1149,7 +1149,7 @@ rpc_mount(struct file_system_type *fs_type, return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); } -void rpc_kill_sb(struct super_block *sb) +static void rpc_kill_sb(struct super_block *sb) { struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 21d106e2ca06..8adfc88e793a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -27,9 +27,6 @@ int sunrpc_net_id; EXPORT_SYMBOL_GPL(sunrpc_net_id); -extern int unix_gid_cache_create(struct net *net); -extern int unix_gid_cache_destroy(struct net *net); - static __net_init int sunrpc_init_net(struct net *net) { int err; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e088b1633d36..40ae884db865 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -396,7 +396,7 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp, int buflen, unsigned int base) { size_t save_iovlen; - void __user *save_iovbase; + void *save_iovbase; unsigned int i; int ret; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4c8281d29e2b..92bc5181dbeb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -53,12 +53,12 @@ static void xs_close(struct rpc_xprt *xprt); /* * xprtsock tunables */ -unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; -unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; -unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; -unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; +static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; From 4b7c8dd205d6df1629ccde9f6dcf6a85d34c37ff Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 12 Mar 2012 11:28:24 -0400 Subject: [PATCH 289/316] NFS: Only define some function when v4.1 is enabled Now that the nfs4_cb_match_client() function is static, gcc notices that it is only used when CONFIG_NFS_V4_1 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f1f047c376d9..2f378487ccde 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -404,6 +404,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, (sin1->sin_port == sin2->sin_port); } +#if defined(CONFIG_NFS_V4_1) /* * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. @@ -422,6 +423,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +#endif /* CONFIG_NFS_V4_1 */ /* * Test if two socket addresses represent the same actual socket, @@ -442,6 +444,7 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } +#if defined(CONFIG_NFS_V4_1) /* Common match routine for v4.0 and v4.1 callback services */ static bool nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, u32 minorversion) @@ -464,6 +467,7 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, return true; } +#endif /* CONFIG_NFS_V4_1 */ /* * Find an nfs_client on the list that matches the initialisation data From 11588f493a2441f09ceb2088d07cc012b53cbf75 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 12 Mar 2012 11:33:00 -0400 Subject: [PATCH 290/316] NFS: Check return value from rpc_queue_upcall() This function could fail to queue the upcall if rpc.idmapd is not running, causing a warning message to be printed. Instead, I want to check the return value and revoke the key if the upcall can't be run. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f9f89fc83ee0..a701a83047d3 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -656,14 +656,19 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, idmap->idmap_key_cons = cons; - return rpc_queue_upcall(idmap->idmap_pipe, msg); + ret = rpc_queue_upcall(idmap->idmap_pipe, msg); + if (ret < 0) + goto out2; + + return ret; out2: kfree(im); out1: kfree(msg); out0: - complete_request_key(cons, ret); + key_revoke(cons->key); + key_revoke(cons->authkey); return ret; } From 0097143c12e279f5d454e0f636a02afff102cc6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Mar 2012 13:29:05 -0400 Subject: [PATCH 291/316] SUNRPC: Don't use variable length automatic arrays in kernel code Replace the variable length array in the RPCSEC_GSS crypto code with a fixed length one. The size should be bounded by the variable GSS_KRB5_MAX_BLOCKSIZE, so use that. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 9576f35ab701..0f43e894bc0a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -600,11 +600,14 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf, u32 ret; struct scatterlist sg[1]; struct blkcipher_desc desc = { .tfm = cipher, .info = iv }; - u8 data[crypto_blkcipher_blocksize(cipher) * 2]; + u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; struct page **save_pages; u32 len = buf->len - offset; - BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2); + if (len > ARRAY_SIZE(data)) { + WARN_ON(0); + return -ENOMEM; + } /* * For encryption, we want to read from the cleartext From 9a3ba432330e504ac61ff0043dbdaba7cea0e35a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Mar 2012 18:01:48 -0400 Subject: [PATCH 292/316] NFSv4: Rate limit the state manager warning messages Prevent the state manager from filling up system logs when recovery fails on the server. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/callback_xdr.c | 4 +++- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index fd6cfdb917da..95bfc243992c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include "nfs4_fs.h" @@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (hdr->minorversion <= 1) { hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { - printk(KERN_WARNING "NFS: %s: NFSv4 server callback with " + pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " "illegal minor version %u!\n", __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 36a7cda03445..5e0961acfef4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1876,7 +1876,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server %s " + pr_warn_ratelimited("NFS: v4 server %s " " returned a bad sequence-id error!\n", NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7c586070d028..cb708b20a775 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -984,7 +984,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BAD_SEQID: if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) return; - printk(KERN_WARNING "NFS: v4 server returned a bad" + pr_warn_ratelimited("NFS: v4 server returned a bad" " sequence-id error on an" " unconfirmed sequence %p!\n", seqid->sequence); @@ -1840,7 +1840,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - printk(KERN_WARNING "NFS: state manager failed on NFSv4 server %s" + pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); From e138ead73f872559778bb0c326e795206f96d3ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Mar 2012 20:18:48 +0300 Subject: [PATCH 293/316] NFS: null dereference in dev_remove() In commit 5ffaf85541 "NFS: replace global bl_wq with per-net one" we made "msg" a pointer instead of a struct stored in stack memory. But we forgot to change the memset() here so we're still clearing stack memory instead clearing the struct like we intended. It will lead to a kernel crash. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayoutdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 30fc22af7bbb..737d839bc17b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -54,7 +54,7 @@ static void dev_remove(struct net *net, dev_t dev) dprintk("Entering %s\n", __func__); bl_pipe_msg.bl_wq = &nn->bl_wq; - memset(&msg, 0, sizeof(*msg)); + memset(msg, 0, sizeof(*msg)); msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); if (!msg->data) goto out; From 5318a29c1943e9719e71495db6efb6fc084a45a9 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 13 Mar 2012 20:44:26 -0700 Subject: [PATCH 294/316] pnfs-obj: Uglify objio_segment allocation for the sake of the principle :-( At some past instance Linus Trovalds wrote: > From: Linus Torvalds > commit a84a79e4d369a73c0130b5858199e949432da4c6 upstream. > > The size is always valid, but variable-length arrays generate worse code > for no good reason (unless the function happens to be inlined and the > compiler sees the length for the simple constant it is). > > Also, there seems to be some code generation problem on POWER, where > Henrik Bakken reports that register r28 can get corrupted under some > subtle circumstances (interrupt happening at the wrong time?). That all > indicates some seriously broken compiler issues, but since variable > length arrays are bad regardless, there's little point in trying to > chase it down. > > "Just don't do that, then". Since then any use of "variable length arrays" has become blasphemous. Even in perfectly good, beautiful, perfectly safe code like the one below where the variable length arrays are only used as a sizeof() parameter, for type-safe dynamic structure allocations. GCC is not executing any stack allocation code. I have produced a small file which defines two functions main1(unsigned numdevs) and main2(unsigned numdevs). main1 uses code as before with call to malloc and main2 uses code as of after this patch. I compiled it as: gcc -O2 -S see_asm.c and here is what I get: main1: .LFB7: .cfi_startproc mov %edi, %edi leaq 4(%rdi,%rdi), %rdi salq $3, %rdi jmp malloc .cfi_endproc .LFE7: .size main1, .-main1 .p2align 4,,15 .globl main2 .type main2, @function main2: .LFB8: .cfi_startproc mov %edi, %edi addq $2, %rdi salq $4, %rdi jmp malloc .cfi_endproc .LFE8: .size main2, .-main2 .section .text.startup,"ax",@progbits .p2align 4,,15 *Exact* same code !!! So please seriously consider not accepting this patch and leave the perfectly good code intact. CC: Linus Torvalds Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 37 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 405a62bdb9b4..3a621a2fd321 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -205,25 +205,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, struct objio_segment **pseg) { - struct __alloc_objio_segment { - struct objio_segment olseg; - struct ore_dev *ods[numdevs]; - struct ore_comp comps[numdevs]; - } *aolseg; +/* This is the in memory structure of the objio_segment + * + * struct __alloc_objio_segment { + * struct objio_segment olseg; + * struct ore_dev *ods[numdevs]; + * struct ore_comp comps[numdevs]; + * } *aolseg; + * NOTE: The code as above compiles and runs perfectly. It is elegant, + * type safe and compact. At some Past time Linus has decided he does not + * like variable length arrays, For the sake of this principal we uglify + * the code as below. + */ + struct objio_segment *lseg; + size_t lseg_size = sizeof(*lseg) + + numdevs * sizeof(lseg->oc.ods[0]) + + numdevs * sizeof(*lseg->oc.comps); - aolseg = kzalloc(sizeof(*aolseg), gfp_flags); - if (unlikely(!aolseg)) { + lseg = kzalloc(lseg_size, gfp_flags); + if (unlikely(!lseg)) { dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, - numdevs, sizeof(*aolseg)); + numdevs, lseg_size); return -ENOMEM; } - aolseg->olseg.oc.numdevs = numdevs; - aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; - aolseg->olseg.oc.comps = aolseg->comps; - aolseg->olseg.oc.ods = aolseg->ods; + lseg->oc.numdevs = numdevs; + lseg->oc.single_comp = EC_MULTPLE_COMPS; + lseg->oc.ods = (void *)(lseg + 1); + lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); - *pseg = &aolseg->olseg; + *pseg = lseg; return 0; } From 96dcadc2fdd111dca90d559f189a30c65394451a Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Wed, 14 Mar 2012 12:32:04 +0100 Subject: [PATCH 295/316] NFSv4: Rate limit the state manager for lock reclaim warning messages Adding rate limit on `Lock reclaim failed` messages since it could fill up system logs Signed-off-by: William Dauchy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cb708b20a775..119006b0815a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1261,7 +1261,8 @@ restart: spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) - printk("NFS: %s: Lock reclaim " + pr_warn_ratelimited("NFS: " + "%s: Lock reclaim " "failed!\n", __func__); } spin_unlock(&state->state_lock); From 95a13f7b33be87d85d8e6652126a3f4d64d164db Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Mar 2012 21:55:01 -0400 Subject: [PATCH 296/316] NFS: Fix a compile error when !defined NFS_DEBUG We should use the 'ifdebug' wrapper rather than trying to inline tests of nfs_debug, so that the code compiles correctly when we don't define NFS_DEBUG. Reported-by: Paul Gortmaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f7e064d997f6..c74fdb114b48 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3440,7 +3440,7 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) goto out_eio; - if (unlikely(nfs_debug & NFSDBG_XDR)) + ifdebug (XDR) pr_cont("%s%.*s ", (path->ncomponents != n ? "/ " : ""), component->len, component->data); From 8dd3775889345850ecddd689b5c200cdd91bd8c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Mar 2012 17:16:40 -0400 Subject: [PATCH 297/316] NFSv4.1: Clean ups and bugfixes for the pNFS read/writeback/commit code Move more pnfs-isms out of the generic commit code. Bugfixes: - filelayout_scan_commit_lists doesn't need to get/put the lseg. In fact since it is run under the inode->i_lock, the lseg_put() can deadlock. - Ensure that we distinguish between what needs to be done for commit-to-data server and what needs to be done for commit-to-MDS using the new flag PG_COMMIT_TO_DS. Otherwise we may end up calling put_lseg() on a bucket for a struct nfs_page that got written through the MDS. - Fix a case where we were using list_del() on an nfs_page->wb_list instead of list_del_init(). - filelayout_initiate_commit needs to call filelayout_commit_release on error instead of the mds_ops->rpc_release(). Otherwise it won't clear the commit lock. Cleanups: - Let the files layout manage the commit lists for the pNFS case. Don't expose stuff like pnfs_choose_commit_list, and the fact that the commit buckets hold references to the layout segment in common code. - Cast out the put_lseg() calls for the struct nfs_read/write_data->lseg into the pNFS layer from whence they came. - Let the pNFS layer manage the NFS_INO_PNFS_COMMIT bit. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/internal.h | 4 +- fs/nfs/nfs4filelayout.c | 82 ++++++++++++++++++------- fs/nfs/pnfs.c | 3 + fs/nfs/pnfs.h | 55 ++++++++--------- fs/nfs/read.c | 1 - fs/nfs/write.c | 126 ++++++++++++++++++++++++--------------- include/linux/nfs_page.h | 11 ++++ 7 files changed, 184 insertions(+), 98 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04a914704e7b..2476dc69365f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -308,8 +308,6 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, - int max); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, @@ -334,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list, void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(void *data); void nfs_commit_release_pages(struct nfs_write_data *data); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); +void nfs_request_remove_commit_list(struct nfs_page *req); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 379a085f8f25..c24e077c2820 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -224,6 +224,7 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); } @@ -310,6 +311,7 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; + put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); } @@ -320,6 +322,7 @@ static void filelayout_commit_release(void *data) nfs_commit_release_pages(wdata); if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) nfs_commit_clear_lock(NFS_I(wdata->inode)); + put_lseg(wdata->lseg); nfs_commitdata_release(wdata); } @@ -779,11 +782,16 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. - * Note inode lock is held, so we can't do the put here. */ -static struct pnfs_layout_segment * -filelayout_remove_commit_req(struct nfs_page *req) +static void +filelayout_clear_request_commit(struct nfs_page *req) { + struct pnfs_layout_segment *freeme = NULL; + struct inode *inode = req->wb_context->dentry->d_inode; + + spin_lock(&inode->i_lock); + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) + goto out; if (list_is_singular(&req->wb_list)) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layout_segment *lseg; @@ -792,11 +800,16 @@ filelayout_remove_commit_req(struct nfs_page *req) * since there is only one relevant lseg... */ list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { - if (lseg->pls_range.iomode == IOMODE_RW) - return lseg; + if (lseg->pls_range.iomode == IOMODE_RW) { + freeme = lseg; + break; + } } } - return NULL; +out: + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + put_lseg(freeme); } static struct list_head * @@ -829,9 +842,20 @@ filelayout_choose_commit_list(struct nfs_page *req, */ get_lseg(lseg); } + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; } +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg) +{ + struct list_head *list; + + list = filelayout_choose_commit_list(req, lseg); + nfs_request_add_commit_list(req, list); +} + static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) { struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); @@ -872,7 +896,7 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); - data->mds_ops->rpc_release(data); + filelayout_commit_release(data); return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); @@ -895,7 +919,7 @@ find_only_write_lseg_locked(struct inode *inode) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) if (lseg->pls_range.iomode == IOMODE_RW) - return get_lseg(lseg); + return lseg; return NULL; } @@ -905,10 +929,33 @@ static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) spin_lock(&inode->i_lock); rv = find_only_write_lseg_locked(inode); + if (rv) + get_lseg(rv); spin_unlock(&inode->i_lock); return rv; } +static int +filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + nfs_request_remove_commit_list(req); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; + } + return ret; +} + /* Move reqs from written to committing lists, returning count of number moved. * Note called with i_lock held. */ @@ -920,21 +967,16 @@ static int filelayout_scan_commit_lists(struct inode *inode, int max) lseg = find_only_write_lseg_locked(inode); if (!lseg) - return 0; + goto out_done; fl = FILELAYOUT_LSEG(lseg); if (fl->commit_through_mds) - goto out_put; - for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i].written)) - continue; - cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written, - &fl->commit_buckets[i].committing, - max); + goto out_done; + for (i = 0; i < fl->number_of_buckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], max); max -= cnt; rv += cnt; } -out_put: - put_lseg(lseg); +out_done: return rv; } @@ -1033,8 +1075,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .choose_commit_list = filelayout_choose_commit_list, - .remove_commit_req = filelayout_remove_commit_req, + .mark_request_commit = filelayout_mark_request_commit, + .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6f1c1e3d12bc..b5d451586943 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1210,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) } data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); } + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1223,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_list_add_request(data->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; + put_lseg(data->lseg); nfs_writedata_release(data); } @@ -1323,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data) data->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index ef92f676cf1e..e98ff3027d3a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,9 +94,9 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - struct list_head * (*choose_commit_list) (struct nfs_page *req, + void (*mark_request_commit) (struct nfs_page *req, struct pnfs_layout_segment *lseg); - struct pnfs_layout_segment *(*remove_commit_req) (struct nfs_page *req); + void (*clear_request_commit) (struct nfs_page *req); int (*scan_commit_lists) (struct inode *inode, int max); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); @@ -269,39 +269,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; - struct list_head *rv; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - if (lseg && NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list) - rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req, lseg); - else - rv = &NFS_I(inode)->commit_list; - return rv; + if (lseg == NULL || ld->mark_request_commit == NULL) + return false; + ld->mark_request_commit(req, lseg); + return true; } -static inline struct pnfs_layout_segment * +static inline bool pnfs_clear_request_commit(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - if (NFS_SERVER(inode)->pnfs_curr_ld && - NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req) - return NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req(req); - else - return NULL; + if (ld == NULL || ld->clear_request_commit == NULL) + return false; + ld->clear_request_commit(req); + return true; } static inline int pnfs_scan_commit_lists(struct inode *inode, int max) { - if (NFS_SERVER(inode)->pnfs_curr_ld && - NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists) - return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(inode, max); - else + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + int ret; + + if (ld == NULL || ld->scan_commit_lists == NULL) return 0; + ret = ld->scan_commit_lists(inode, max); + if (ret != 0) + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + return ret; } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -403,18 +406,16 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return PNFS_NOT_ATTEMPTED; } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - struct inode *inode = req->wb_context->dentry->d_inode; - - return &NFS_I(inode)->commit_list; + return false; } -static inline struct pnfs_layout_segment * +static inline bool pnfs_clear_request_commit(struct nfs_page *req) { - return NULL; + return false; } static inline int diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3c2540d532c7..2662c0298dd0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -66,7 +66,6 @@ void nfs_readdata_free(struct nfs_read_data *p) void nfs_readdata_release(struct nfs_read_data *rdata) { - put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a630ad65d64c..0de19f413f92 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p) void nfs_writedata_release(struct nfs_write_data *wdata) { - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -393,8 +392,6 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_unlock(&inode->i_lock); } -static struct pnfs_layout_segment *nfs_clear_request_commit(struct nfs_page *req); - /* * Remove a write request from an inode */ @@ -402,18 +399,15 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - struct pnfs_layout_segment *lseg; BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); - lseg = nfs_clear_request_commit(req); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); nfsi->npages--; spin_unlock(&inode->i_lock); - put_lseg(lseg); nfs_release_request(req); } @@ -424,6 +418,57 @@ nfs_mark_request_dirty(struct nfs_page *req) } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +/** + * nfs_request_add_commit_list - add request to a commit list + * @req: pointer to a struct nfs_page + * @head: commit list head + * + * This sets the PG_CLEAN bit, updates the inode global count of + * number of outstanding requests requiring a commit as well as + * the MM page stats. + * + * The caller must _not_ hold the inode->i_lock, but must be + * holding the nfs_page lock. + */ +void +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + set_bit(PG_CLEAN, &(req)->wb_flags); + spin_lock(&inode->i_lock); + nfs_list_add_request(req, head); + NFS_I(inode)->ncommit++; + spin_unlock(&inode->i_lock); + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); +} +EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); + +/** + * nfs_request_remove_commit_list - Remove request from a commit list + * @req: pointer to a nfs_page + * + * This clears the PG_CLEAN bit, and updates the inode global count of + * number of outstanding requests requiring a commit + * It does not update the MM page stats. + * + * The caller _must_ hold the inode->i_lock and the nfs_page lock. + */ +void +nfs_request_remove_commit_list(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) + return; + nfs_list_remove_request(req); + NFS_I(inode)->ncommit--; +} +EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); + + /* * Add a request to the inode's commit list. */ @@ -431,18 +476,10 @@ static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - struct list_head *clist; - clist = pnfs_choose_commit_list(req, lseg); - spin_lock(&inode->i_lock); - set_bit(PG_CLEAN, &(req)->wb_flags); - nfs_list_add_request(req, clist); - nfsi->ncommit++; - spin_unlock(&inode->i_lock); - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + if (pnfs_mark_request_commit(req, lseg)) + return; + nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); } static void @@ -452,18 +489,19 @@ nfs_clear_page_commit(struct page *page) dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); } -static struct pnfs_layout_segment * +static void nfs_clear_request_commit(struct nfs_page *req) { - struct pnfs_layout_segment *lseg = NULL; + if (test_bit(PG_CLEAN, &req->wb_flags)) { + struct inode *inode = req->wb_context->dentry->d_inode; - if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + if (!pnfs_clear_request_commit(req)) { + spin_lock(&inode->i_lock); + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + } nfs_clear_page_commit(req->wb_page); - lseg = pnfs_clear_request_commit(req); - NFS_I(req->wb_context->dentry->d_inode)->ncommit--; - list_del(&req->wb_list); } - return lseg; } static inline @@ -490,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, return 0; } #else -static inline void +static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * +static void nfs_clear_request_commit(struct nfs_page *req) { - return NULL; } static inline @@ -523,25 +560,23 @@ nfs_need_commit(struct nfs_inode *nfsi) } /* i_lock held by caller */ -int +static int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) { struct nfs_page *req, *tmp; int ret = 0; list_for_each_entry_safe(req, tmp, src, wb_list) { - if (nfs_lock_request_dontget(req)) { - kref_get(&req->wb_kref); - list_move_tail(&req->wb_list, dst); - clear_bit(PG_CLEAN, &(req)->wb_flags); - ret++; - if (ret == max) - break; - } + if (!nfs_lock_request(req)) + continue; + nfs_request_remove_commit_list(req); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; } return ret; } -EXPORT_SYMBOL_GPL(nfs_scan_commit_list); /* * nfs_scan_commit - Scan an inode for commit requests @@ -559,14 +594,12 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst) spin_lock(&inode->i_lock); if (nfsi->ncommit > 0) { + const int max = INT_MAX; int pnfs_ret; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, INT_MAX); - pnfs_ret = pnfs_scan_commit_lists(inode, INT_MAX - ret); - if (pnfs_ret) { - ret += pnfs_ret; - set_bit(NFS_INO_PNFS_COMMIT, &nfsi->flags); - } + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max); + pnfs_ret = pnfs_scan_commit_lists(inode, max - ret); + ret += pnfs_ret; nfsi->ncommit -= ret; } spin_unlock(&inode->i_lock); @@ -601,7 +634,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, unsigned int rqend; unsigned int end; int error; - struct pnfs_layout_segment *lseg = NULL; if (!PagePrivate(page)) return NULL; @@ -637,8 +669,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - lseg = nfs_clear_request_commit(req); - /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; @@ -650,7 +680,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); - put_lseg(lseg); + nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -1337,7 +1367,6 @@ void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1647,6 +1676,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) if (req == NULL) break; if (nfs_lock_request_dontget(req)) { + nfs_clear_request_commit(req); nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 50856e9c1e5f..eac30d6bec17 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -28,6 +28,7 @@ enum { PG_NEED_COMMIT, PG_NEED_RESCHED, PG_PARTIAL_READ_FAILED, + PG_COMMIT_TO_DS, }; struct nfs_inode; @@ -104,6 +105,16 @@ nfs_lock_request_dontget(struct nfs_page *req) return !test_and_set_bit(PG_BUSY, &req->wb_flags); } +static inline int +nfs_lock_request(struct nfs_page *req) +{ + if (test_and_set_bit(PG_BUSY, &req->wb_flags)) + return 0; + kref_get(&req->wb_kref); + return 1; +} + + /** * nfs_list_add_request - Insert a request into a list * @req: request From 9390f42546339cf111edd23c16d6cf74ca41974c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Mar 2012 13:52:45 -0400 Subject: [PATCH 298/316] NFSv4.1: Fix a few issues in filelayout_commit_pagelist - Fix a race in which NFS_I(inode)->commits_outstanding could potentially go to zero (triggering a call to nfs_commit_clear_lock()) before we're done sending out all the commit RPC calls. - If nfs_commitdata_alloc fails, there is no reason why we shouldn't try to send off all the commits-to-ds. - Simplify the error handling. - Change pnfs_commit_list() to always return either PNFS_ATTEMPTED or PNFS_NOT_ATTEMPTED. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/nfs4filelayout.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index c24e077c2820..e0bdbf4fe454 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -980,12 +980,14 @@ out_done: return rv; } -static int alloc_ds_commits(struct inode *inode, struct list_head *list) +static unsigned int +alloc_ds_commits(struct inode *inode, struct list_head *list) { struct pnfs_layout_segment *lseg; struct nfs4_filelayout_segment *fl; struct nfs_write_data *data; int i, j; + unsigned int nreq = 0; /* Won't need this when non-whole file layout segments are supported * instead we will use a pnfs_layout_hdr structure */ @@ -998,15 +1000,14 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) continue; data = nfs_commitdata_alloc(); if (!data) - goto out_bad; + break; data->ds_commit_index = i; data->lseg = lseg; list_add(&data->pages, list); + nreq++; } - put_lseg(lseg); - return 0; -out_bad: + /* Clean up on error */ for (j = i; j < fl->number_of_buckets; j++) { if (list_empty(&fl->commit_buckets[i].committing)) continue; @@ -1015,7 +1016,7 @@ out_bad: } put_lseg(lseg); /* Caller will clean up entries put on list */ - return -ENOMEM; + return nreq; } /* This follows nfs_commit_list pretty closely */ @@ -1025,21 +1026,29 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, { struct nfs_write_data *data, *tmp; LIST_HEAD(list); + unsigned int nreq = 0; if (!list_empty(mds_pages)) { data = nfs_commitdata_alloc(); - if (!data) - goto out_bad; - data->lseg = NULL; - list_add(&data->pages, &list); + if (data != NULL) { + data->lseg = NULL; + list_add(&data->pages, &list); + nreq++; + } else + nfs_retry_commit(mds_pages, NULL); } - if (alloc_ds_commits(inode, &list)) - goto out_bad; + nreq += alloc_ds_commits(inode, &list); + + if (nreq == 0) { + nfs_commit_clear_lock(NFS_I(inode)); + goto out; + } + + atomic_add(nreq, &NFS_I(inode)->commits_outstanding); list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); - atomic_inc(&NFS_I(inode)->commits_outstanding); if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL); nfs_initiate_commit(data, NFS_CLIENT(inode), @@ -1049,16 +1058,8 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, filelayout_initiate_commit(data, how); } } - return 0; - out_bad: - list_for_each_entry_safe(data, tmp, &list, pages) { - nfs_retry_commit(&data->pages, data->lseg); - list_del_init(&data->pages); - nfs_commit_free(data); - } - nfs_retry_commit(mds_pages, NULL); - nfs_commit_clear_lock(NFS_I(inode)); - return -ENOMEM; +out: + return PNFS_ATTEMPTED; } static void From e49a29bd0eacce9d4956c4daf777a330115b369d Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 16 Mar 2012 19:25:52 +0000 Subject: [PATCH 299/316] Try using machine credentials for RENEW calls Using user credentials for RENEW calls will fail when the user credentials have expired. To avoid this, try using the machine credentials when making RENEW calls. If no machine credentials have been set, fall back to using user credentials as before. Signed-off-by: Sachin Prabhu Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 119006b0815a..12b068f2ec91 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) struct rpc_cred *cred = NULL; struct nfs_server *server; + /* Use machine credentials if available */ + cred = nfs4_get_machine_cred_locked(clp); + if (cred != NULL) + goto out; + rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { cred = nfs4_get_renew_cred_server_locked(server); @@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) break; } rcu_read_unlock(); + +out: return cred; } From 540a0f7584169651f485e8ab67461fcb06934e38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Mar 2012 13:39:35 -0400 Subject: [PATCH 300/316] SUNRPC: We must not use list_for_each_entry_safe() in rpc_wake_up() The problem is that for the case of priority queues, we have to assume that __rpc_remove_wait_queue_priority will move new elements from the tk_wait.links lists into the queue->tasks[] list. We therefore cannot use list_for_each_entry_safe() on queue->tasks[], since that will skip these new tasks that __rpc_remove_wait_queue_priority is adding. Without this fix, rpc_wake_up and rpc_wake_up_status will both fail to wake up all functions on priority wait queues, which can result in some nasty hangs. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1c570a81096a..994cfea2bad6 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -534,14 +534,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -559,13 +563,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } From c4f1b62a4b50a01e8d820717906b674807ef9ca3 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 20 Mar 2012 12:51:24 -0400 Subject: [PATCH 301/316] NFS: ncommit count is being double decremented The decrement is handled by each call to nfs_request_remove_commit_list, no need to do it again in nfs_scan_commit. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0de19f413f92..628d9a69d0a2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -595,12 +595,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst) spin_lock(&inode->i_lock); if (nfsi->ncommit > 0) { const int max = INT_MAX; - int pnfs_ret; ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max); - pnfs_ret = pnfs_scan_commit_lists(inode, max - ret); - ret += pnfs_ret; - nfsi->ncommit -= ret; + ret += pnfs_scan_commit_lists(inode, max - ret); } spin_unlock(&inode->i_lock); return ret; From 5ae67c4fee869c9b3c87b727a9ea511b6326b834 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Mar 2012 16:17:18 -0400 Subject: [PATCH 302/316] NFSv4: It is not safe to dereference lsp->ls_state in release_lockowner It is quite possible for the release_lockowner RPC call to race with the close RPC call, in which case, we cannot dereference lsp->ls_state in order to find the nfs_server. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/nfs4state.c | 8 +++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b47bdb9c1612..97ecc863dd76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -340,7 +340,7 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern void nfs4_free_lock_state(struct nfs4_lock_state *lsp); +extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); extern const nfs4_stateid zero_stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e0961acfef4..d41d97fb4cb9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4760,13 +4760,14 @@ out: struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; + struct nfs_server *server; struct nfs_release_lockowner_args args; }; static void nfs4_release_lockowner_release(void *calldata) { struct nfs_release_lockowner_data *data = calldata; - nfs4_free_lock_state(data->lsp); + nfs4_free_lock_state(data->server, data->lsp); kfree(calldata); } @@ -4788,6 +4789,7 @@ int nfs4_release_lockowner(struct nfs4_lock_state *lsp) if (!data) return -ENOMEM; data->lsp = lsp; + data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->args.lock_owner.id = lsp->ls_seqid.owner_id; data->args.lock_owner.s_dev = server->s_dev; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 12b068f2ec91..0f43414eb25a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -791,10 +791,8 @@ out_free: return NULL; } -void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct nfs_server *server = lsp->ls_state->owner->so_server; - ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); @@ -828,7 +826,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ } spin_unlock(&state->state_lock); if (new != NULL) - nfs4_free_lock_state(new); + nfs4_free_lock_state(state->owner->so_server, new); return lsp; } @@ -853,7 +851,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (nfs4_release_lockowner(lsp) == 0) return; } - nfs4_free_lock_state(lsp); + nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) From 3b3be88d67cc17d0f0ab6edaf131516793fc947e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 17 Mar 2012 11:59:30 -0400 Subject: [PATCH 303/316] NFS: Use cond_resched_lock() to reduce latencies in the commit scans Ensure that we conditionally drop the inode->i_lock when it is safe to do so in the commit loops. We do so after locking the nfs_page, but before removing it from the commit list. We can then use list_safe_reset_next to recover the loop after the lock is retaken. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 11 ++++++++--- fs/nfs/pnfs.h | 8 ++++---- fs/nfs/write.c | 11 ++++++++--- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e0bdbf4fe454..634c0bcb4fd6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -936,7 +936,8 @@ static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) } static int -filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max) +filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, + spinlock_t *lock) { struct list_head *src = &bucket->written; struct list_head *dst = &bucket->committing; @@ -946,6 +947,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max) list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; + if (cond_resched_lock(lock)) + list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); @@ -959,7 +962,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max) /* Move reqs from written to committing lists, returning count of number moved. * Note called with i_lock held. */ -static int filelayout_scan_commit_lists(struct inode *inode, int max) +static int filelayout_scan_commit_lists(struct inode *inode, int max, + spinlock_t *lock) { struct pnfs_layout_segment *lseg; struct nfs4_filelayout_segment *fl; @@ -972,7 +976,8 @@ static int filelayout_scan_commit_lists(struct inode *inode, int max) if (fl->commit_through_mds) goto out_done; for (i = 0; i < fl->number_of_buckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], max); + cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], + max, lock); max -= cnt; rv += cnt; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e98ff3027d3a..07802652f5a3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -97,7 +97,7 @@ struct pnfs_layoutdriver_type { void (*mark_request_commit) (struct nfs_page *req, struct pnfs_layout_segment *lseg); void (*clear_request_commit) (struct nfs_page *req); - int (*scan_commit_lists) (struct inode *inode, int max); + int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); /* @@ -294,14 +294,14 @@ pnfs_clear_request_commit(struct nfs_page *req) } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max) +pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; int ret; if (ld == NULL || ld->scan_commit_lists == NULL) return 0; - ret = ld->scan_commit_lists(inode, max); + ret = ld->scan_commit_lists(inode, max, lock); if (ret != 0) set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); return ret; @@ -419,7 +419,7 @@ pnfs_clear_request_commit(struct nfs_page *req) } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max) +pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 628d9a69d0a2..bd93d40099f9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -561,7 +561,8 @@ nfs_need_commit(struct nfs_inode *nfsi) /* i_lock held by caller */ static int -nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, + spinlock_t *lock) { struct nfs_page *req, *tmp; int ret = 0; @@ -569,6 +570,8 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; + if (cond_resched_lock(lock)) + list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req); nfs_list_add_request(req, dst); ret++; @@ -596,8 +599,10 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst) if (nfsi->ncommit > 0) { const int max = INT_MAX; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max); - ret += pnfs_scan_commit_lists(inode, max - ret); + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, + &inode->i_lock); + ret += pnfs_scan_commit_lists(inode, max - ret, + &inode->i_lock); } spin_unlock(&inode->i_lock); return ret; From e27d359e9b7e446190362cd5c8fe281d02194896 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 18 Mar 2012 14:07:42 -0400 Subject: [PATCH 304/316] SUNRPC/NFS: Add Kbuild dependencies for NFS_DEBUG/RPC_DEBUG This allows us to turn on/off the dprintk() debugging interfaces for those distributions that don't ship the 'rpcdebug' utility. It also allows us to add Kbuild dependencies. Specifically, we already know that dprintk() in general relies on CONFIG_SYSCTL. Now it turns out that the NFS dprintks depend on CONFIG_CRC32 after we added support for the filehandle hash. Reported-by: Paul Gortmaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 6 ++++++ fs/nfs/inode.c | 2 +- fs/nfs/mount_clnt.c | 2 +- fs/nfs/nfsroot.c | 2 +- include/linux/nfs_fs.h | 17 ++++++++--------- include/linux/sunrpc/debug.h | 2 +- net/sunrpc/Kconfig | 13 +++++++++++++ 7 files changed, 31 insertions(+), 13 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 7bce64c7060e..2a0e6c599147 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -144,3 +144,9 @@ config NFS_USE_KERNEL_DNS depends on NFS_V4 && !NFS_USE_LEGACY_DNS select DNS_RESOLVER default y + +config NFS_DEBUG + bool + depends on NFS_FS && SUNRPC_DEBUG + select CRC32 + default y diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1a19f8d30c14..7bb4d13c1cd5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1047,7 +1047,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) return fh; } -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG /* * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle * in the same way that wireshark does diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index b37ca34af903..8e65c7f1f87c 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,7 +16,7 @@ #include #include "internal.h" -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG # define NFSDBG_FACILITY NFSDBG_MOUNT #endif diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c4744e1d513c..cd3c910d2d12 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; /* server:export path string passed to super.c */ static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG /* * When the "nfsrootdebug" kernel command line option is specified, * enable debugging messages for NFSROOT. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0a63ab2b5a76..8f27c2e36ddf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -38,6 +38,13 @@ #ifdef __KERNEL__ +/* + * Enable dprintk() debugging support for nfs client. + */ +#ifdef CONFIG_NFS_DEBUG +# define NFS_DEBUG +#endif + #include #include #include @@ -391,7 +398,7 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh) kfree(fh); } -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG extern u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh); static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) { @@ -650,14 +657,6 @@ nfs_fileid_to_ino_t(u64 fileid) #ifdef __KERNEL__ -/* - * Enable debugging support for nfs client. - * Requires RPC_DEBUG. - */ -#ifdef RPC_DEBUG -# define NFS_DEBUG -#endif - # undef ifdebug # ifdef NFS_DEBUG # define ifdebug(fac) if (unlikely(nfs_debug & NFSDBG_##fac)) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 6cb2517bcf75..9448eb5e426c 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -31,7 +31,7 @@ /* * Enable RPC debugging/profiling. */ -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SUNRPC_DEBUG #define RPC_DEBUG #endif #ifdef CONFIG_TRACEPOINTS diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index ffd243d09188..9fe8857d8d59 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -39,3 +39,16 @@ config RPCSEC_GSS_KRB5 Kerberos support should be installed. If unsure, say Y. + +config SUNRPC_DEBUG + bool "RPC: Enable dprintk debugging" + depends on SUNRPC && SYSCTL + help + This option enables a sysctl-based debugging interface + that is be used by the 'rpcdebug' utility to turn on or off + logging of different aspects of the kernel RPC activity. + + Disabling this option will make your kernel slightly smaller, + but makes troubleshooting NFS issues significantly harder. + + If unsure, say Y. From b3b536a1230a14e3ef97e5331ba7ad036c6258a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Mar 2012 19:20:53 -0400 Subject: [PATCH 305/316] SUNRPC: Kill compiler warning when RPC_DEBUG is unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loads of these: linux/net/sunrpc/rpcb_clnt.c:942:2: warning: suggest braces around empty body in ‘do’ statement [-Wempty-body] show up when I unset CONFIG_PROC_SYSCTL. Seen with gcc (GCC) 4.6.1 20110908 (Red Hat 4.6.1-9) Reported-by: Andrew Morton Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 9448eb5e426c..a76cc20d98ce 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -74,8 +74,8 @@ extern unsigned int nlm_debug; # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) -# define dfprintk(fac, args...) do ; while (0) -# define dfprintk_rcu(fac, args...) do ; while (0) +# define dfprintk(fac, args...) do {} while (0) +# define dfprintk_rcu(fac, args...) do {} while (0) # define RPC_IFDEBUG(x) #endif From 9304a8120a6ac06d08874d2aec76f52d3376dfe4 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 20 Mar 2012 19:26:42 +1100 Subject: [PATCH 306/316] nfs: non void functions must return a value Signed-off-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8f27c2e36ddf..69ec9cb3a867 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -413,6 +413,7 @@ extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption); #else static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) { + return 0; } static inline void nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) From ffa94db6042e6fd014ae0bed8832ac707ef2afe9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2012 09:22:00 -0400 Subject: [PATCH 307/316] SUNRPC/LOCKD: Fix build warnings when CONFIG_SUNRPC_DEBUG is undefined Stephen Rothwell reports: net/sunrpc/rpcb_clnt.c: In function 'rpcb_enc_mapping': net/sunrpc/rpcb_clnt.c:820:19: warning: unused variable 'task' [-Wunused-variable] net/sunrpc/rpcb_clnt.c: In function 'rpcb_dec_getport': net/sunrpc/rpcb_clnt.c:837:19: warning: unused variable 'task' [-Wunused-variable] net/sunrpc/rpcb_clnt.c: In function 'rpcb_dec_set': net/sunrpc/rpcb_clnt.c:860:19: warning: unused variable 'task' [-Wunused-variable] net/sunrpc/rpcb_clnt.c: In function 'rpcb_enc_getaddr': net/sunrpc/rpcb_clnt.c:892:19: warning: unused variable 'task' [-Wunused-variable] net/sunrpc/rpcb_clnt.c: In function 'rpcb_dec_getaddr': net/sunrpc/rpcb_clnt.c:914:19: warning: unused variable 'task' [-Wunused-variable] fs/lockd/svclock.c:49:20: warning: 'nlmdbg_cookie2a' declared 'static' but never defined [-Wunused-function] Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- fs/lockd/svclock.c | 59 +++++++++++++++++++++--------------------- net/sunrpc/rpcb_clnt.c | 27 ++++++++++--------- 2 files changed, 42 insertions(+), 44 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index f0179c3745d2..e46353f41a42 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -46,7 +46,6 @@ static void nlmsvc_remove_block(struct nlm_block *block); static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); static void nlmsvc_freegrantargs(struct nlm_rqst *call); static const struct rpc_call_ops nlmsvc_grant_ops; -static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); /* * The list of blocked locks to retry @@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); +#ifdef LOCKD_DEBUG +static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) +{ + /* + * We can get away with a static buffer because we're only + * called with BKL held. + */ + static char buf[2*NLM_MAXCOOKIELEN+1]; + unsigned int i, len = sizeof(buf); + char *p = buf; + + len--; /* allow for trailing \0 */ + if (len < 3) + return "???"; + for (i = 0 ; i < cookie->len ; i++) { + if (len < 2) { + strcpy(p-3, "..."); + break; + } + sprintf(p, "%02x", cookie->data[i]); + p += 2; + len -= 2; + } + *p = '\0'; + + return buf; +} +#endif + /* * Insert a blocked lock into the global list */ @@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void) return timeout; } - -#ifdef RPC_DEBUG -static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) -{ - /* - * We can get away with a static buffer because we're only - * called with BKL held. - */ - static char buf[2*NLM_MAXCOOKIELEN+1]; - unsigned int i, len = sizeof(buf); - char *p = buf; - - len--; /* allow for trailing \0 */ - if (len < 3) - return "???"; - for (i = 0 ; i < cookie->len ; i++) { - if (len < 2) { - strcpy(p-3, "..."); - break; - } - sprintf(p, "%02x", cookie->data[i]); - p += 2; - len -= 2; - } - *p = '\0'; - - return buf; -} -#endif diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e699ff0ce909..207a74696c9f 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -817,11 +817,11 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); @@ -834,7 +834,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; unsigned long port; __be32 *p; @@ -845,8 +844,8 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, port); + dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -857,7 +856,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, unsigned int *boolp) { - struct rpc_task *task = req->rq_task; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -869,7 +867,8 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 1; dprintk("RPC: %5u RPCB_%s call %s\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } @@ -889,11 +888,11 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_netid, rpcb->r_addr); @@ -911,7 +910,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; - struct rpc_task *task = req->rq_task; __be32 *p; u32 len; @@ -928,7 +926,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, */ if (len == 0) { dprintk("RPC: %5u RPCB reply: program not registered\n", - task->tk_pid); + req->rq_task->tk_pid); return 0; } @@ -938,8 +936,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, (char *)p); + dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) @@ -950,7 +948,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, out_fail: dprintk("RPC: %5u malformed RPCB_%s reply\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name); + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } From 6f00866ddd15724eb20eac4ddf6e2c6c1a6cfcdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2012 14:12:46 -0400 Subject: [PATCH 308/316] NFS: Fix more NFS debug related build warnings Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 5 ++--- fs/nfs/fscache.c | 2 +- fs/nfs/pnfs.h | 8 +++++++- fs/nfs/pnfs_dev.c | 2 ++ include/linux/nfs_fs.h | 2 ++ 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 61501346324e..9c94297bb70e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -233,12 +233,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; - size_t count = rdata->args.count; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; - dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, - rdata->npages, f_offset, count); + dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, + rdata->npages, f_offset, (unsigned int)rdata->args.count); par = alloc_parallel(rdata); if (!par) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 419119c371bf..ae65c16b3670 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_server *nfss = NFS_SERVER(inode); - struct fscache_cookie *old = nfsi->fscache; + NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); nfs_fscache_inode_lock(inode); if (nfsi->fscache) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 07802652f5a3..442ebf68eeec 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -228,7 +228,6 @@ struct nfs4_deviceid_node { atomic_t ref; }; -void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, @@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +#ifdef NFS_DEBUG +void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); +#else +static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) +{ +} +#endif /* NFS_DEBUG */ #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 6b4cd3849306..73f701f1f4d3 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -43,6 +43,7 @@ static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static DEFINE_SPINLOCK(nfs4_deviceid_lock); +#ifdef NFS_DEBUG void nfs4_print_deviceid(const struct nfs4_deviceid *id) { @@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id) p[0], p[1], p[2], p[3]); } EXPORT_SYMBOL_GPL(nfs4_print_deviceid); +#endif static inline u32 nfs4_deviceid_hash(const struct nfs4_deviceid *id) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 69ec9cb3a867..52a1bdb4ee2b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -661,8 +661,10 @@ nfs_fileid_to_ino_t(u64 fileid) # undef ifdebug # ifdef NFS_DEBUG # define ifdebug(fac) if (unlikely(nfs_debug & NFSDBG_##fac)) +# define NFS_IFDEBUG(x) x # else # define ifdebug(fac) if (0) +# define NFS_IFDEBUG(x) # endif #endif /* __KERNEL */ From c6cb80d00be42f30716ec817b963bcec094433b5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 19 Mar 2012 14:54:39 -0400 Subject: [PATCH 309/316] NFS: Remove nfs4_setup_sequence from generic write code This is an NFS v4 specific operation, so it belongs in the NFS v4 code and not the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ---- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 11 +++++++++++ fs/nfs/proc.c | 6 ++++++ fs/nfs/write.c | 15 +-------------- include/linux/nfs_xdr.h | 1 + 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1940f1a56a5f..c4bdaf15289a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -554,9 +554,7 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; @@ -696,9 +694,7 @@ out_unlock: } static const struct rpc_call_ops nfs_write_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_write_result, .rpc_release = nfs_direct_write_release, }; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 91943953a370..3bcf722800f3 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -828,6 +828,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } +static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + rpc_call_start(task); +} + static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -881,6 +886,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .read_setup = nfs3_proc_read_setup, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_done = nfs3_commit_done, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d41d97fb4cb9..dc910691acc0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3373,6 +3373,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } +static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; @@ -6449,6 +6459,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_done = nfs4_commit_done, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0c672588fe5a..8069b41e7f2d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -668,6 +668,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } +static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + rpc_call_start(task); +} + static void nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { @@ -738,6 +743,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .read_setup = nfs_proc_read_setup, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .lock = nfs_proc_lock, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bd93d40099f9..2c68818f68ac 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1172,23 +1172,14 @@ out: nfs_writedata_release(calldata); } -#if defined(CONFIG_NFS_V4_1) void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->inode)->write_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_write_partial_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, .rpc_release = nfs_writeback_release_partial, }; @@ -1250,9 +1241,7 @@ remove_request: } static const struct rpc_call_ops nfs_write_full_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, .rpc_release = nfs_writeback_release_full, }; @@ -1544,9 +1533,7 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c1cf86cceee4..b02a5f9eb217 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1246,6 +1246,7 @@ struct nfs_rpc_ops { void (*read_setup) (struct nfs_read_data *, struct rpc_message *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); + void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); int (*commit_done) (struct rpc_task *, struct nfs_write_data *); From ea7c330362257c072791aeaf03bae2cebf9fb984 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 19 Mar 2012 14:54:40 -0400 Subject: [PATCH 310/316] NFS: Remove nfs4_setup_sequence from generic read code This is an NFS v4 specific operation, so it belongs in the NFS v4 code and not the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 -- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 11 +++++++++++ fs/nfs/proc.c | 6 ++++++ fs/nfs/read.c | 13 +------------ include/linux/nfs_xdr.h | 1 + 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c4bdaf15289a..9c7f66ac6cc2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -265,9 +265,7 @@ static void nfs_direct_read_release(void *calldata) } static const struct rpc_call_ops nfs_read_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_read_result, .rpc_release = nfs_direct_read_release, }; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3bcf722800f3..9d9b239329dc 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -814,6 +814,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } +static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + rpc_call_start(task); +} + static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -884,6 +889,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dc910691acc0..915385fcf532 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3299,6 +3299,16 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } +static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + /* Reset the the nfs_read_data to send the read to the MDS. */ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) { @@ -6457,6 +6467,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 8069b41e7f2d..a8df70742d00 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -651,6 +651,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } +static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + rpc_call_start(task); +} + static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs_async_handle_expired_key(task)) @@ -741,6 +746,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, .write_rpc_prepare = nfs_proc_write_rpc_prepare, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2662c0298dd0..cc1f758a7ee1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -464,23 +464,14 @@ static void nfs_readpage_release_partial(void *calldata) nfs_readdata_release(calldata); } -#if defined(CONFIG_NFS_V4_1) void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, &data->res.seq_res, - task)) - return; - rpc_call_start(task); + NFS_PROTO(data->inode)->read_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_read_partial_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_partial, .rpc_release = nfs_readpage_release_partial, }; @@ -544,9 +535,7 @@ static void nfs_readpage_release_full(void *calldata) } static const struct rpc_call_ops nfs_read_full_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_full, .rpc_release = nfs_readpage_release_full, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b02a5f9eb217..286d74dde053 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1244,6 +1244,7 @@ struct nfs_rpc_ops { int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); + void (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); From 34e137cc7e3b63c254875e59cd48dcbe6757fe6c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 19 Mar 2012 14:54:41 -0400 Subject: [PATCH 311/316] NFS: Remove nfs4_setup_sequence from generic unlink code This is an NFS v4 specific operation, so it belongs in the NFS v4 code and not the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 11 +++++++++++ fs/nfs/proc.c | 6 ++++++ fs/nfs/unlink.c | 20 +------------------- include/linux/nfs_xdr.h | 10 ++++++++++ 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9d9b239329dc..7f3f957f677c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } +static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + rpc_call_start(task); +} + static int nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) { @@ -874,6 +879,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .create = nfs3_proc_create, .remove = nfs3_proc_remove, .unlink_setup = nfs3_proc_unlink_setup, + .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 915385fcf532..9c247fa7915a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2779,6 +2779,16 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); } +static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->dir), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { struct nfs_removeres *res = task->tk_msg.rpc_resp; @@ -6451,6 +6461,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .create = nfs4_proc_create, .remove = nfs4_proc_remove, .unlink_setup = nfs4_proc_unlink_setup, + .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a8df70742d00..528b9a2fae05 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } +static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + rpc_call_start(task); +} + static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { if (nfs_async_handle_expired_key(task)) @@ -731,6 +736,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .create = nfs_proc_create, .remove = nfs_proc_remove, .unlink_setup = nfs_proc_unlink_setup, + .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index fae71c9f5050..9c5a7980e244 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -20,15 +20,6 @@ #include "iostat.h" #include "delegation.h" -struct nfs_unlinkdata { - struct hlist_node list; - struct nfs_removeargs args; - struct nfs_removeres res; - struct inode *dir; - struct rpc_cred *cred; - struct nfs_fattr dir_attr; -}; - /** * nfs_free_unlinkdata - release data from a sillydelete operation. * @data: pointer to unlink structure. @@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata) nfs_sb_deactive(sb); } -#if defined(CONFIG_NFS_V4_1) static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - struct nfs_server *server = NFS_SERVER(data->dir); - - if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_unlink_prepare, -#endif /* CONFIG_NFS_V4_1 */ }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 286d74dde053..8d93e688188d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1192,6 +1192,15 @@ struct nfs_write_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; +struct nfs_unlinkdata { + struct hlist_node list; + struct nfs_removeargs args; + struct nfs_removeres res; + struct inode *dir; + struct rpc_cred *cred; + struct nfs_fattr dir_attr; +}; + struct nfs_access_entry; struct nfs_client; struct rpc_timeout; @@ -1221,6 +1230,7 @@ struct nfs_rpc_ops { struct iattr *, int, struct nfs_open_context *); int (*remove) (struct inode *, struct qstr *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); + void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); From c6bfa1a16377b42496ecc0490a33516c0e414e7b Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 19 Mar 2012 14:54:42 -0400 Subject: [PATCH 312/316] NFS: Remove nfs4_setup_sequence from generic rename code This is an NFS v4 specific operation, so it belongs in the NFS v4 code and not the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 11 +++++++++++ fs/nfs/proc.c | 6 ++++++ fs/nfs/unlink.c | 23 +---------------------- include/linux/nfs_xdr.h | 13 +++++++++++++ 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7f3f957f677c..5242eae6711a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -450,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } +static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + rpc_call_start(task); +} + static int nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -883,6 +888,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, + .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, .rename_done = nfs3_proc_rename_done, .link = nfs3_proc_link, .symlink = nfs3_proc_symlink, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9c247fa7915a..b76dd0efae75 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2814,6 +2814,16 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); } +static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { @@ -6465,6 +6475,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, + .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 528b9a2fae05..b63b6f4d14fb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -377,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } +static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + rpc_call_start(task); +} + static int nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -740,6 +745,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, + .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, .rename_done = nfs_proc_rename_done, .link = nfs_proc_link, .symlink = nfs_proc_symlink, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 9c5a7980e244..3210a03342f9 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -323,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -struct nfs_renamedata { - struct nfs_renameargs args; - struct nfs_renameres res; - struct rpc_cred *cred; - struct inode *old_dir; - struct dentry *old_dentry; - struct nfs_fattr old_fattr; - struct inode *new_dir; - struct dentry *new_dentry; - struct nfs_fattr new_fattr; -}; - /** * nfs_async_rename_done - Sillyrename post-processing * @task: rpc_task of the sillyrename @@ -385,25 +373,16 @@ static void nfs_async_rename_release(void *calldata) kfree(data); } -#if defined(CONFIG_NFS_V4_1) static void nfs_rename_prepare(struct rpc_task *task, void *calldata) { struct nfs_renamedata *data = calldata; - struct nfs_server *server = NFS_SERVER(data->old_dir); - - if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_rename_ops = { .rpc_call_done = nfs_async_rename_done, .rpc_release = nfs_async_rename_release, -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_rename_prepare, -#endif /* CONFIG_NFS_V4_1 */ }; /** diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8d93e688188d..bfd0d1bf6707 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1201,6 +1201,18 @@ struct nfs_unlinkdata { struct nfs_fattr dir_attr; }; +struct nfs_renamedata { + struct nfs_renameargs args; + struct nfs_renameres res; + struct rpc_cred *cred; + struct inode *old_dir; + struct dentry *old_dentry; + struct nfs_fattr old_fattr; + struct inode *new_dir; + struct dentry *new_dentry; + struct nfs_fattr new_fattr; +}; + struct nfs_access_entry; struct nfs_client; struct rpc_timeout; @@ -1235,6 +1247,7 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); void (*rename_setup) (struct rpc_message *msg, struct inode *dir); + void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, struct qstr *); int (*symlink) (struct inode *, struct dentry *, struct page *, From 18d98f6c04991dd3c12acf6f39cea40e9510640a Mon Sep 17 00:00:00 2001 From: Sachin Bhamare Date: Mon, 19 Mar 2012 20:47:58 -0700 Subject: [PATCH 313/316] pnfs-obj: autologin: Add support for protocol autologin The pnfs-objects protocol mandates that we autologin into devices not present in the system, according to information specified in the get_device_info returned from the server. The Protocol specifies two login hints. 1. An IP address:port combination 2. A string URI which is constructed as a URL with a protocol prefix followed by :// and a string as address. For each protocol prefix the string-address format might be different. We only support the second option. The first option is just redundant to the second one. NOTE: The Kernel part of autologin does not parse the URI string. It just channels it to a user-mode script. So any new login protocols should only update the user-mode script which is a part of the nfs-utils package, but the Kernel need not change. We implement the autologin by using the call_usermodehelper() API. (Thanks to Steve Dickson for pointing it out) So there is no running daemon needed, and/or special setup. We Add the osd_login_prog Kernel module parameters which defaults to: /sbin/osd_login Kernel try's to upcall the program specified in osd_login_prog. If the file is not found or the execution fails Kernel will disable any farther upcalls, by zeroing out osd_login_prog, Until Admin re-enables it by setting the osd_login_prog parameter to a proper program. Also add text about the osd_login program command line API to: Documentation/filesystems/nfs/pnfs.txt and documentation of the new osd_login_prog module parameter to: Documentation/kernel-parameters.txt TODO: Add timeout option in the case osd_login program gets stuck Signed-off-by: Sachin Bhamare Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- Documentation/filesystems/nfs/pnfs.txt | 54 ++++++++++ Documentation/kernel-parameters.txt | 6 ++ fs/nfs/objlayout/objio_osd.c | 9 ++ fs/nfs/objlayout/objlayout.c | 134 +++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 2 + 5 files changed, 205 insertions(+) diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt index 983e14abe7e9..c7919c6e3bea 100644 --- a/Documentation/filesystems/nfs/pnfs.txt +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -53,3 +53,57 @@ lseg maintains an extra reference corresponding to the NFS_LSEG_VALID bit which holds it in the pnfs_layout_hdr's list. When the final lseg is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED bit is set, preventing any new lsegs from being added. + +layout drivers +-------------- + +PNFS utilizes what is called layout drivers. The STD defines 3 basic +layout types: "files" "objects" and "blocks". For each of these types +there is a layout-driver with a common function-vectors table which +are called by the nfs-client pnfs-core to implement the different layout +types. + +Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c +Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory +Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory + +objects-layout setup +-------------------- + +As part of the full STD implementation the objlayoutdriver.ko needs, at times, +to automatically login to yet undiscovered iscsi/osd devices. For this the +driver makes up-calles to a user-mode script called *osd_login* + +The path_name of the script to use is by default: + /sbin/osd_login. +This name can be overridden by the Kernel module parameter: + objlayoutdriver.osd_login_prog + +If Kernel does not find the osd_login_prog path it will zero it out +and will not attempt farther logins. An admin can then write new value +to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it. + +The /sbin/osd_login is part of the nfs-utils package, and should usually +be installed on distributions that support this Kernel version. + +The API to the login script is as follows: + Usage: $0 -u -o -s + Options: + -u target uri e.g. iscsi://: + (allways exists) + (More protocols can be defined in the future. + The client does not interpret this string it is + passed unchanged as recieved from the Server) + -o osdname of the requested target OSD + (Might be empty) + (A string which denotes the OSD name, there is a + limit of 64 chars on this string) + -s systemid of the requested target OSD + (Might be empty) + (This string, if not empty is always an hex + representation of the 20 bytes osd_system_id) + +blocks-layout setup +------------------- + +TODO: Document the setup needs of the blocks layout driver diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7bae0fd3b63c..0d79a88f4de9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1687,6 +1687,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. information. + objlayoutdriver.osd_login_prog= + [NFS] [OBJLAYOUT] sets the pathname to the program which + is used to automatically discover and login into new + osd-targets. Please see: + Documentation/filesystems/pnfs.txt for more explanations + nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 3a621a2fd321..4bff4a3dab46 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, struct objio_dev_ent *ode; struct osd_dev *od; struct osd_dev_info odi; + bool retry_flag = true; int err; ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); @@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, goto out; } +retry_lookup: od = osduld_info_lookup(&odi); if (unlikely(IS_ERR(od))) { err = PTR_ERR(od); dprintk("%s: osduld_info_lookup => %d\n", __func__, err); + if (err == -ENODEV && retry_flag) { + err = objlayout_autologin(deviceaddr); + if (likely(!err)) { + retry_flag = false; + goto retry_lookup; + } + } goto out; } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 157c47e277e0..8d45f1c318ce 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -37,6 +37,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include +#include #include #include "objlayout.h" @@ -651,3 +654,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) __free_page(odi->page); kfree(odi); } + +enum { + OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, + OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, + OSD_LOGIN_UPCALL_PATHLEN = 256 +}; + +static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login"; + +module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog), + 0600); +MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program"); + +struct __auto_login { + char uri[OBJLAYOUT_MAX_URI_LEN]; + char osdname[OBJLAYOUT_MAX_OSDNAME_LEN]; + char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN]; +}; + +static int __objlayout_upcall(struct __auto_login *login) +{ + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + char *argv[8]; + int ret; + + if (unlikely(!osd_login_prog[0])) { + dprintk("%s: osd_login_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s uri: %s\n", __func__, login->uri); + dprintk("%s osdname %s\n", __func__, login->osdname); + dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex); + + argv[0] = (char *)osd_login_prog; + argv[1] = "-u"; + argv[2] = login->uri; + argv[3] = "-o"; + argv[4] = login->osdname; + argv[5] = "-s"; + argv[6] = login->systemid_hex; + argv[7] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or + * EACCES error. The admin can re-enable it on the fly by using + * sysfs to set the objlayoutdriver.osd_login_prog module parameter once + * the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + printk(KERN_ERR "PNFS-OBJ: %s was not found please set " + "objlayoutdriver.osd_login_prog kernel parameter!\n", + osd_login_prog); + osd_login_prog[0] = '\0'; + } + dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret); + + return ret; +} + +/* Assume dest is all zeros */ +static void __copy_nfsS_and_zero_terminate(struct nfs4_string s, + char *dest, int max_len, + const char *var_name) +{ + if (!s.len) + return; + + if (s.len >= max_len) { + pr_warn_ratelimited( + "objlayout_autologin: %s: s.len(%d) >= max_len(%d)", + var_name, s.len, max_len); + s.len = max_len - 1; /* space for null terminator */ + } + + memcpy(dest, s.data, s.len); +} + +/* Assume sysid is all zeros */ +static void _sysid_2_hex(struct nfs4_string s, + char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN]) +{ + int i; + char *cur; + + if (!s.len) + return; + + if (s.len != OSD_SYSTEMID_LEN) { + pr_warn_ratelimited( + "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN", + s.len); + if (s.len > OSD_SYSTEMID_LEN) + s.len = OSD_SYSTEMID_LEN; + } + + cur = sysid; + for (i = 0; i < s.len; i++) + cur = hex_byte_pack(cur, s.data[i]); +} + +int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr) +{ + int rc; + struct __auto_login login; + + if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len) + return -ENODEV; + + memset(&login, 0, sizeof(login)); + __copy_nfsS_and_zero_terminate( + deviceaddr->oda_targetaddr.ota_netaddr.r_addr, + login.uri, sizeof(login.uri), "URI"); + + __copy_nfsS_and_zero_terminate( + deviceaddr->oda_osdname, + login.osdname, sizeof(login.osdname), "OSDNAME"); + + _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex); + + rc = __objlayout_upcall(&login); + if (rc > 0) /* script returns positive values */ + rc = -ENODEV; + + return rc; +} diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 8ec34727ed21..880ba086be94 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn( struct xdr_stream *, const struct nfs4_layoutreturn_args *); +extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr); + #endif /* _OBJLAYOUT_H */ From 4a6862b3649d705bf41a36e3c7943d0322a9ee27 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Mon, 20 Feb 2012 13:07:42 -0600 Subject: [PATCH 314/316] xprtrdma: The transport should not bug-check when a dup reply is received The client side RDMA transport will bug check if it receives a duplicate reply, instead we should simply drop the duplicate reply. Signed-off-by: Tom Tucker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/rpc_rdma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 554d0814c875..12de982b7a50 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -771,13 +771,18 @@ repost: /* get request object */ req = rpcr_to_rdmar(rqst); + if (req->rl_reply) { + spin_unlock(&xprt->transport_lock); + dprintk("RPC: %s: duplicate reply 0x%p to RPC " + "request 0x%p: xid 0x%08x\n", __func__, rep, req, + headerp->rm_xid); + goto repost; + } dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", __func__, rep, req, rqst, headerp->rm_xid); - BUG_ON(!req || req->rl_reply); - /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; From 9b78145c0f280d4f01c460d6251eab2584181fa9 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Mon, 20 Feb 2012 13:07:57 -0600 Subject: [PATCH 315/316] xprtrdma: Remove assumption that each segment is <= PAGE_SIZE The xprtrdma FRMR mapping logic assumes that a segment is <= PAGE_SIZE. This is not true for NFS4. Signed-off-by: Tom Tucker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 28236bab57f9..745973b729af 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1490,6 +1490,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, u8 key; int len, pageoff; int i, rc; + int seg_len; + u64 pa; + int page_no; pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -1497,11 +1500,15 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, len = -pageoff; if (*nsegs > RPCRDMA_MAX_DATA_SEGS) *nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < *nsegs;) { + for (page_no = i = 0; i < *nsegs;) { rpcrdma_map_one(ia, seg, writing); - seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; + pa = seg->mr_dma; + for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { + seg1->mr_chunk.rl_mw->r.frmr.fr_pgl-> + page_list[page_no++] = pa; + pa += PAGE_SIZE; + } len += seg->mr_len; - BUG_ON(seg->mr_len > PAGE_SIZE); ++seg; ++i; /* Check for holes */ @@ -1540,9 +1547,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; - frmr_wr.wr.fast_reg.page_list_len = i; + frmr_wr.wr.fast_reg.page_list_len = page_no; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; BUG_ON(frmr_wr.wr.fast_reg.length < len); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : From 5a7c9eec9fde1da0e3adf0a4ddb64ff2a324a492 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Thu, 15 Mar 2012 23:58:52 +0530 Subject: [PATCH 316/316] NFS: fix sb->s_id in nfs debug prints NFS bdi flush thread in ps output is printed like "flush-:" For example: $ ps aux | grep flush 2079 root 0 SW [flush-0:18] ^^^^ nfs_bdi_register() ==> bdi_register_dev() ==> bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); ^^^^^ However, NFS sb->s_id store major:minor number in hex: nfs_initialise_sb() ==> snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); ^^^^^ If we enable nfs debug prints using command: $ rpcdebug -m nfs -s all write to a file: $ dd if=/dev/zero of=/testfile.txt bs=32768 count=1 Without Patch: [ 2431.032000] NFS: 0 initiated write call (req 0:12/40, 32768 bytes @ offset 0) ^^^^ With Patch: [ 2431.032000] NFS: 0 initiated write call (req 0:18/40, 32768 bytes @ offset 0) ^^^^ We should store NFS "s->s_id" in decimal to avoid confusion between NFS flush thread name(in ps output) and NFS debug prints. Signed-off-by: Vivek Trivedi Signed-off-by: Namjae Jeon Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aac403085be5..ccc4cdb1efe9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2115,7 +2115,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* We probably want something more informative here */ snprintf(sb->s_id, sizeof(sb->s_id), - "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize,