From 1fed17df7e501b170f876b87eec11f930e3f1df5 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 14 Jun 2019 18:42:17 +0000 Subject: [PATCH 01/13] hv_balloon: Use a static page for the balloon_up send buffer It's unnecessary to dynamically allocate the buffer. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/hv_balloon.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 6fb4ea5f0304..4b06f076374a 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -494,7 +494,7 @@ enum hv_dm_state { static __u8 recv_buffer[PAGE_SIZE]; -static __u8 *send_buffer; +static __u8 balloon_up_send_buffer[PAGE_SIZE]; #define PAGES_IN_2M 512 #define HA_CHUNK (32 * 1024) @@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy) } while (!done) { - bl_resp = (struct dm_balloon_response *)send_buffer; - memset(send_buffer, 0, PAGE_SIZE); + memset(balloon_up_send_buffer, 0, PAGE_SIZE); + bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer; bl_resp->hdr.type = DM_BALLOON_RESPONSE; bl_resp->hdr.size = sizeof(struct dm_balloon_response); bl_resp->more_pages = 1; @@ -1578,19 +1578,11 @@ static int balloon_probe(struct hv_device *dev, do_hot_add = false; #endif - /* - * First allocate a send buffer. - */ - - send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!send_buffer) - return -ENOMEM; - ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, balloon_onchannelcallback, dev); if (ret) - goto probe_error0; + return ret; dm_device.dev = dev; dm_device.state = DM_INITIALIZING; @@ -1716,8 +1708,6 @@ probe_error2: probe_error1: vmbus_close(dev->channel); -probe_error0: - kfree(send_buffer); return ret; } @@ -1736,7 +1726,6 @@ static int balloon_remove(struct hv_device *dev) vmbus_close(dev->channel); kthread_stop(dm->thread); - kfree(send_buffer); #ifdef CONFIG_MEMORY_HOTPLUG restore_online_page_callback(&hv_online_page); unregister_memory_notifier(&hv_memory_nb); From 221f6df008ab39151600d4b09c85fcc730716bcb Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 14 Jun 2019 18:42:30 +0000 Subject: [PATCH 02/13] hv_balloon: Reorganize the probe function Move the code that negotiates with the host to a new function balloon_connect_vsp() and improve the error handling. This makes the code more readable and paves the way for the support of hibernation in future. Makes no real logic change here. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/hv_balloon.c | 124 +++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4b06f076374a..34bd73526afd 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1564,50 +1564,18 @@ static void balloon_onchannelcallback(void *context) } -static int balloon_probe(struct hv_device *dev, - const struct hv_vmbus_device_id *dev_id) +static int balloon_connect_vsp(struct hv_device *dev) { - int ret; - unsigned long t; struct dm_version_request version_req; struct dm_capabilities cap_msg; - -#ifdef CONFIG_MEMORY_HOTPLUG - do_hot_add = hot_add; -#else - do_hot_add = false; -#endif + unsigned long t; + int ret; ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, - balloon_onchannelcallback, dev); - + balloon_onchannelcallback, dev); if (ret) return ret; - dm_device.dev = dev; - dm_device.state = DM_INITIALIZING; - dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; - init_completion(&dm_device.host_event); - init_completion(&dm_device.config_event); - INIT_LIST_HEAD(&dm_device.ha_region_list); - spin_lock_init(&dm_device.ha_lock); - INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); - INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); - dm_device.host_specified_ha_region = false; - - dm_device.thread = - kthread_run(dm_thread_func, &dm_device, "hv_balloon"); - if (IS_ERR(dm_device.thread)) { - ret = PTR_ERR(dm_device.thread); - goto probe_error1; - } - -#ifdef CONFIG_MEMORY_HOTPLUG - set_online_page_callback(&hv_online_page); - register_memory_notifier(&hv_memory_nb); -#endif - - hv_set_drvdata(dev, &dm_device); /* * Initiate the hand shake with the host and negotiate * a version that the host can support. We start with the @@ -1623,16 +1591,15 @@ static int balloon_probe(struct hv_device *dev, dm_device.version = version_req.version.version; ret = vmbus_sendpacket(dev->channel, &version_req, - sizeof(struct dm_version_request), - (unsigned long)NULL, - VM_PKT_DATA_INBAND, 0); + sizeof(struct dm_version_request), + (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); if (ret) - goto probe_error2; + goto out; t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); if (t == 0) { ret = -ETIMEDOUT; - goto probe_error2; + goto out; } /* @@ -1640,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev, * fail the probe function. */ if (dm_device.state == DM_INIT_ERROR) { - ret = -ETIMEDOUT; - goto probe_error2; + ret = -EPROTO; + goto out; } pr_info("Using Dynamic Memory protocol version %u.%u\n", @@ -1674,16 +1641,15 @@ static int balloon_probe(struct hv_device *dev, cap_msg.max_page_number = -1; ret = vmbus_sendpacket(dev->channel, &cap_msg, - sizeof(struct dm_capabilities), - (unsigned long)NULL, - VM_PKT_DATA_INBAND, 0); + sizeof(struct dm_capabilities), + (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); if (ret) - goto probe_error2; + goto out; t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); if (t == 0) { ret = -ETIMEDOUT; - goto probe_error2; + goto out; } /* @@ -1691,23 +1657,65 @@ static int balloon_probe(struct hv_device *dev, * fail the probe function. */ if (dm_device.state == DM_INIT_ERROR) { - ret = -ETIMEDOUT; - goto probe_error2; + ret = -EPROTO; + goto out; } + return 0; +out: + vmbus_close(dev->channel); + return ret; +} + +static int balloon_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) +{ + int ret; + +#ifdef CONFIG_MEMORY_HOTPLUG + do_hot_add = hot_add; +#else + do_hot_add = false; +#endif + dm_device.dev = dev; + dm_device.state = DM_INITIALIZING; + dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; + init_completion(&dm_device.host_event); + init_completion(&dm_device.config_event); + INIT_LIST_HEAD(&dm_device.ha_region_list); + spin_lock_init(&dm_device.ha_lock); + INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); + INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); + dm_device.host_specified_ha_region = false; + +#ifdef CONFIG_MEMORY_HOTPLUG + set_online_page_callback(&hv_online_page); + register_memory_notifier(&hv_memory_nb); +#endif + + hv_set_drvdata(dev, &dm_device); + + ret = balloon_connect_vsp(dev); + if (ret != 0) + return ret; + dm_device.state = DM_INITIALIZED; - last_post_time = jiffies; + + dm_device.thread = + kthread_run(dm_thread_func, &dm_device, "hv_balloon"); + if (IS_ERR(dm_device.thread)) { + ret = PTR_ERR(dm_device.thread); + goto probe_error; + } return 0; -probe_error2: +probe_error: + vmbus_close(dev->channel); #ifdef CONFIG_MEMORY_HOTPLUG + unregister_memory_notifier(&hv_memory_nb); restore_online_page_callback(&hv_online_page); #endif - kthread_stop(dm_device.thread); - -probe_error1: - vmbus_close(dev->channel); return ret; } @@ -1724,11 +1732,11 @@ static int balloon_remove(struct hv_device *dev) cancel_work_sync(&dm->balloon_wrk.wrk); cancel_work_sync(&dm->ha_wrk.wrk); - vmbus_close(dev->channel); kthread_stop(dm->thread); + vmbus_close(dev->channel); #ifdef CONFIG_MEMORY_HOTPLUG - restore_online_page_callback(&hv_online_page); unregister_memory_notifier(&hv_memory_nb); + restore_online_page_callback(&hv_online_page); #endif spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { From 9b543419652917f048310d0863c47c107c26fb0c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Aug 2019 15:41:00 +0300 Subject: [PATCH 03/13] Tools: hv: move to tools buildsystem There is a nice buildsystem dedicated for userspace tools in Linux kernel tree. Switch Hyper-V daemons to be built by it. Cc: Vitaly Kuznetsov Signed-off-by: Andy Shevchenko Tested-by: Vitaly Kuznetsov Signed-off-by: Sasha Levin --- tools/hv/Build | 3 +++ tools/hv/Makefile | 51 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 tools/hv/Build diff --git a/tools/hv/Build b/tools/hv/Build new file mode 100644 index 000000000000..6cf51fa4b306 --- /dev/null +++ b/tools/hv/Build @@ -0,0 +1,3 @@ +hv_kvp_daemon-y += hv_kvp_daemon.o +hv_vss_daemon-y += hv_vss_daemon.o +hv_fcopy_daemon-y += hv_fcopy_daemon.o diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 5db5e62cebda..b57143d9459c 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -1,28 +1,55 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Hyper-V tools - -WARNINGS = -Wall -Wextra -CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) - -CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include +include ../scripts/Makefile.include sbindir ?= /usr/sbin libexecdir ?= /usr/libexec sharedstatedir ?= /var/lib -ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include + +ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh all: $(ALL_PROGRAMS) -%: %.c - $(CC) $(CFLAGS) -o $@ $^ +export srctree OUTPUT CC LD CFLAGS +include $(srctree)/tools/build/Makefile.include + +HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o +$(HV_KVP_DAEMON_IN): FORCE + $(Q)$(MAKE) $(build)=hv_kvp_daemon +$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o +$(HV_VSS_DAEMON_IN): FORCE + $(Q)$(MAKE) $(build)=hv_vss_daemon +$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o +$(HV_FCOPY_DAEMON_IN): FORCE + $(Q)$(MAKE) $(build)=hv_fcopy_daemon +$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ clean: - $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon + rm -f $(ALL_PROGRAMS) + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -install: all +install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(sbindir); \ install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \ install -d -m 755 $(DESTDIR)$(sharedstatedir); \ @@ -33,3 +60,7 @@ install: all for script in $(ALL_SCRIPTS); do \ install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \ done + +FORCE: + +.PHONY: all install clean FORCE prepare From 345f0254e5b2f4090e4a00ebc996e07e9bdcd070 Mon Sep 17 00:00:00 2001 From: Maya Nakamura Date: Fri, 12 Jul 2019 08:27:38 +0000 Subject: [PATCH 04/13] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Acked-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 7795831d37c2..cc5b09b87ab0 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -104,8 +104,8 @@ struct synthhid_input_report { #pragma pack(pop) -#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) -#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) enum pipe_prot_msg_type { From dba61cda30469a6c4fed0f8d5bf2b6001ca80a51 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:15 +0000 Subject: [PATCH 05/13] Drivers: hv: vmbus: Break out synic enable and disable operations Break out synic enable and disable operations into separate hv_synic_disable_regs() and hv_synic_enable_regs() functions for use by a later patch to support hibernation. There is no functional change except the unnecessary check "if (sctrl.enable != 1) return -EFAULT;" which is removed, because when we're in hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/hv.c | 66 ++++++++++++++++++++++----------------- drivers/hv/hyperv_vmbus.h | 2 ++ 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 6188fb7dda42..fcc52797c169 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -154,7 +154,7 @@ void hv_synic_free(void) * retrieve the initialized message and event pages. Otherwise, we create and * initialize the message and event pages. */ -int hv_synic_init(unsigned int cpu) +void hv_synic_enable_regs(unsigned int cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); @@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu) sctrl.enable = 1; hv_set_synic_state(sctrl.as_uint64); +} + +int hv_synic_init(unsigned int cpu) +{ + hv_synic_enable_regs(cpu); hv_stimer_init(cpu); @@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu) /* * hv_synic_cleanup - Cleanup routine for hv_synic_init(). */ -int hv_synic_cleanup(unsigned int cpu) +void hv_synic_disable_regs(unsigned int cpu) { union hv_synic_sint shared_sint; union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_scontrol sctrl; + + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + shared_sint.masked = 1; + + /* Need to correctly cleanup in the case of SMP!!! */ + /* Disable the interrupt */ + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + hv_get_simp(simp.as_uint64); + simp.simp_enabled = 0; + simp.base_simp_gpa = 0; + + hv_set_simp(simp.as_uint64); + + hv_get_siefp(siefp.as_uint64); + siefp.siefp_enabled = 0; + siefp.base_siefp_gpa = 0; + + hv_set_siefp(siefp.as_uint64); + + /* Disable the global synic bit */ + hv_get_synic_state(sctrl.as_uint64); + sctrl.enable = 0; + hv_set_synic_state(sctrl.as_uint64); +} + +int hv_synic_cleanup(unsigned int cpu) +{ struct vmbus_channel *channel, *sc; bool channel_found = false; unsigned long flags; - hv_get_synic_state(sctrl.as_uint64); - if (sctrl.enable != 1) - return -EFAULT; - /* * Search for channels which are bound to the CPU we're about to * cleanup. In case we find one and vmbus is still connected we need to @@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu) hv_stimer_cleanup(cpu); - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); - - shared_sint.masked = 1; - - /* Need to correctly cleanup in the case of SMP!!! */ - /* Disable the interrupt */ - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); - - hv_get_simp(simp.as_uint64); - simp.simp_enabled = 0; - simp.base_simp_gpa = 0; - - hv_set_simp(simp.as_uint64); - - hv_get_siefp(siefp.as_uint64); - siefp.siefp_enabled = 0; - siefp.base_siefp_gpa = 0; - - hv_set_siefp(siefp.as_uint64); - - /* Disable the global synic bit */ - sctrl.enable = 0; - hv_set_synic_state(sctrl.as_uint64); + hv_synic_disable_regs(cpu); return 0; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..26ea161a6876 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -171,8 +171,10 @@ extern int hv_synic_alloc(void); extern void hv_synic_free(void); +extern void hv_synic_enable_regs(unsigned int cpu); extern int hv_synic_init(unsigned int cpu); +extern void hv_synic_disable_regs(unsigned int cpu); extern int hv_synic_cleanup(unsigned int cpu); /* Interface */ From 63ecc6d22ce46643165c391a9c90ba67e22e1c0f Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:16 +0000 Subject: [PATCH 06/13] Drivers: hv: vmbus: Suspend/resume the synic for hibernation This is needed when we resume the old kernel from the "current" kernel. Note: when hv_synic_suspend() and hv_synic_resume() run, all the non-boot CPUs have been offlined, and interrupts are disabled on CPU0. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index ebd35fc35290..2ef375ce58ac 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -2086,6 +2087,47 @@ static void hv_crash_handler(struct pt_regs *regs) hyperv_cleanup(); }; +static int hv_synic_suspend(void) +{ + /* + * When we reach here, all the non-boot CPUs have been offlined, and + * the stimers on them have been unbound in hv_synic_cleanup() -> + * hv_stimer_cleanup() -> clockevents_unbind_device(). + * + * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here + * we do not unbind the stimer on CPU0 because: 1) it's unnecessary + * because the interrupts remain disabled between syscore_suspend() + * and syscore_resume(): see create_image() and resume_target_kernel(); + * 2) the stimer on CPU0 is automatically disabled later by + * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... + * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning + * would be triggered if we call clockevents_unbind_device(), which + * may sleep, in an interrupts-disabled context. So, we intentionally + * don't call hv_stimer_cleanup(0) here. + */ + + hv_synic_disable_regs(0); + + return 0; +} + +static void hv_synic_resume(void) +{ + hv_synic_enable_regs(0); + + /* + * Note: we don't need to call hv_stimer_init(0), because the timer + * on CPU0 is not unbound in hv_synic_suspend(), and the timer is + * automatically re-enabled in timekeeping_resume(). + */ +} + +/* The callbacks run only on CPU0, with irqs_disabled. */ +static struct syscore_ops hv_synic_syscore_ops = { + .suspend = hv_synic_suspend, + .resume = hv_synic_resume, +}; + static int __init hv_acpi_init(void) { int ret, t; @@ -2116,6 +2158,8 @@ static int __init hv_acpi_init(void) hv_setup_kexec_handler(hv_kexec_handler); hv_setup_crash_handler(hv_crash_handler); + register_syscore_ops(&hv_synic_syscore_ops); + return 0; cleanup: @@ -2128,6 +2172,8 @@ static void __exit vmbus_exit(void) { int cpu; + unregister_syscore_ops(&hv_synic_syscore_ops); + hv_remove_kexec_handler(); hv_remove_crash_handler(); vmbus_connection.conn_state = DISCONNECTED; From ed56ef675ae6ef0e6f7d42b9c42dae61172f0960 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:16 +0000 Subject: [PATCH 07/13] Drivers: hv: vmbus: Add a helper function is_sub_channel() The existing method of telling if a channel is sub-channel in vmbus_process_offer() is cumbersome. This new simple helper function is preferred in future. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- include/linux/hyperv.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6256cc34c4a6..2d39248cff96 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -245,7 +245,10 @@ struct vmbus_channel_offer { } pipe; } u; /* - * The sub_channel_index is defined in win8. + * The sub_channel_index is defined in Win8: a value of zero means a + * primary channel and a value of non-zero means a sub-channel. + * + * Before Win8, the field is reserved, meaning it's always zero. */ u16 sub_channel_index; u16 reserved3; @@ -934,6 +937,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c) VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); } +static inline bool is_sub_channel(const struct vmbus_channel *c) +{ + return c->offermsg.offer.sub_channel_index != 0; +} + static inline void set_channel_affinity_state(struct vmbus_channel *c, enum hv_numa_policy policy) { From 271b2224d42f88870e6b060924ee374871c131fc Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:17 +0000 Subject: [PATCH 08/13] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation The high-level VSC drivers will implement device-specific callbacks. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hyperv.h | 3 +++ 2 files changed, 49 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2ef375ce58ac..a30c70adf9a0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device) drv->shutdown(dev); } +/* + * vmbus_suspend - Suspend a vmbus device + */ +static int vmbus_suspend(struct device *child_device) +{ + struct hv_driver *drv; + struct hv_device *dev = device_to_hv_device(child_device); + + /* The device may not be attached yet */ + if (!child_device->driver) + return 0; + + drv = drv_to_hv_drv(child_device->driver); + if (!drv->suspend) + return -EOPNOTSUPP; + + return drv->suspend(dev); +} + +/* + * vmbus_resume - Resume a vmbus device + */ +static int vmbus_resume(struct device *child_device) +{ + struct hv_driver *drv; + struct hv_device *dev = device_to_hv_device(child_device); + + /* The device may not be attached yet */ + if (!child_device->driver) + return 0; + + drv = drv_to_hv_drv(child_device->driver); + if (!drv->resume) + return -EOPNOTSUPP; + + return drv->resume(dev); +} /* * vmbus_device_release - Final callback release of the vmbus child device @@ -926,6 +963,14 @@ static void vmbus_device_release(struct device *device) kfree(hv_dev); } +/* + * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than + * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm. + */ +static const struct dev_pm_ops vmbus_pm = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume) +}; + /* The one and only one */ static struct bus_type hv_bus = { .name = "vmbus", @@ -936,6 +981,7 @@ static struct bus_type hv_bus = { .uevent = vmbus_uevent, .dev_groups = vmbus_dev_groups, .drv_groups = vmbus_drv_groups, + .pm = &vmbus_pm, }; struct onmessage_work_context { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2d39248cff96..8a60e7766037 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1157,6 +1157,9 @@ struct hv_driver { int (*remove)(struct hv_device *); void (*shutdown)(struct hv_device *); + int (*suspend)(struct hv_device *); + int (*resume)(struct hv_device *); + }; /* Base device object */ From e3ede02add7e6df315afc6b4120520f7d0c5a258 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:18 +0000 Subject: [PATCH 09/13] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation When the VM resumes, the host re-sends the offers. We should not add the offers to the global vmbus_connection.chn_list again. This patch assumes the RELIDs of the channels don't change across hibernation. Actually this is not always true, especially in the case of NIC SR-IOV the VF vmbus device's RELID sometimes can change. A later patch will address this issue by mapping the new offers to the old channels and fixing up the old channels, if necessary. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/channel_mgmt.c | 58 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index addcef50df7a..44b92fa1b7fa 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -847,6 +847,37 @@ void vmbus_initiate_unload(bool crash) vmbus_wait_for_unload(); } +/* + * find_primary_channel_by_offer - Get the channel object given the new offer. + * This is only used in the resume path of hibernation. + */ +static struct vmbus_channel * +find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) +{ + struct vmbus_channel *channel = NULL, *iter; + const guid_t *inst1, *inst2; + + /* Ignore sub-channel offers. */ + if (offer->offer.sub_channel_index != 0) + return NULL; + + mutex_lock(&vmbus_connection.channel_mutex); + + list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { + inst1 = &iter->offermsg.offer.if_instance; + inst2 = &offer->offer.if_instance; + + if (guid_equal(inst1, inst2)) { + channel = iter; + break; + } + } + + mutex_unlock(&vmbus_connection.channel_mutex); + + return channel; +} + /* * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. * @@ -854,12 +885,37 @@ void vmbus_initiate_unload(bool crash) static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) { struct vmbus_channel_offer_channel *offer; - struct vmbus_channel *newchannel; + struct vmbus_channel *oldchannel, *newchannel; + size_t offer_sz; offer = (struct vmbus_channel_offer_channel *)hdr; trace_vmbus_onoffer(offer); + oldchannel = find_primary_channel_by_offer(offer); + + if (oldchannel != NULL) { + atomic_dec(&vmbus_connection.offer_in_progress); + + /* + * We're resuming from hibernation: we expect the host to send + * exactly the same offers that we had before the hibernation. + */ + offer_sz = sizeof(*offer); + if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) + return; + + pr_debug("Mismatched offer from the host (relid=%d)\n", + offer->child_relid); + + print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET, + 16, 4, &oldchannel->offermsg, offer_sz, + false); + print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET, + 16, 4, offer, offer_sz, false); + return; + } + /* Allocate the channel object and save this offer. */ newchannel = alloc_channel(); if (!newchannel) { From f53335e3289f9ac3a6a8faf6c2f819eee508bd39 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:19 +0000 Subject: [PATCH 10/13] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation Before Linux enters hibernation, it sends the CHANNELMSG_UNLOAD message to the host so all the offers are gone. After hibernation, Linux needs to re-negotiate with the host using the same vmbus protocol version (which was in use before hibernation), and ask the host to re-offer the vmbus devices. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/connection.c | 3 +- drivers/hv/hyperv_vmbus.h | 2 ++ drivers/hv/vmbus_drv.c | 59 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 09829e15d4a0..806319cd5ccf 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -59,8 +59,7 @@ static __u32 vmbus_get_next_version(__u32 current_version) } } -static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, - __u32 version) +int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) { int ret = 0; unsigned int cur_cpu; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 26ea161a6876..e657197a027a 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -274,6 +274,8 @@ struct vmbus_msginfo { extern struct vmbus_connection vmbus_connection; +int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version); + static inline void vmbus_send_interrupt(u32 relid) { sync_set_bit(relid, vmbus_connection.send_int_page); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a30c70adf9a0..ce9974bf683f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2089,6 +2089,51 @@ acpi_walk_err: return ret_val; } +static int vmbus_bus_suspend(struct device *dev) +{ + vmbus_initiate_unload(false); + + vmbus_connection.conn_state = DISCONNECTED; + + return 0; +} + +static int vmbus_bus_resume(struct device *dev) +{ + struct vmbus_channel_msginfo *msginfo; + size_t msgsize; + int ret; + + /* + * We only use the 'vmbus_proto_version', which was in use before + * hibernation, to re-negotiate with the host. + */ + if (vmbus_proto_version == VERSION_INVAL || + vmbus_proto_version == 0) { + pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); + return -EINVAL; + } + + msgsize = sizeof(*msginfo) + + sizeof(struct vmbus_channel_initiate_contact); + + msginfo = kzalloc(msgsize, GFP_KERNEL); + + if (msginfo == NULL) + return -ENOMEM; + + ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); + + kfree(msginfo); + + if (ret != 0) + return ret; + + vmbus_request_offers(); + + return 0; +} + static const struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, {"VMBus", 0}, @@ -2096,6 +2141,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); +/* + * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than + * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the + * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the + * pci "noirq" restore callback runs before "non-noirq" callbacks (see + * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> + * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's + * resume callback must also run via the "noirq" callbacks. + */ +static const struct dev_pm_ops vmbus_bus_pm = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume) +}; + static struct acpi_driver vmbus_acpi_driver = { .name = "vmbus", .ids = vmbus_acpi_device_ids, @@ -2103,6 +2161,7 @@ static struct acpi_driver vmbus_acpi_driver = { .add = vmbus_acpi_add, .remove = vmbus_acpi_remove, }, + .drv.pm = &vmbus_bus_pm, }; static void hv_kexec_handler(void) From 1f48dcf180e5422b1a633b24680dd0f5c3f540f5 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:20 +0000 Subject: [PATCH 11/13] Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for hibernation. There is no better method to clean up the channels since some of the channels may still be referenced by the userspace apps when hibernation is triggered: in this case, with this patch, the "rescind" fields of the channels are set, and the apps will thoroughly destroy the channels after hibernation. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/vmbus_drv.c | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index ce9974bf683f..45b976ec6e79 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -1069,6 +1070,41 @@ msg_handled: vmbus_signal_eom(msg, message_type); } +/* + * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for + * hibernation, because hv_sock connections can not persist across hibernation. + */ +static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) +{ + struct onmessage_work_context *ctx; + struct vmbus_channel_rescind_offer *rescind; + + WARN_ON(!is_hvsock_channel(channel)); + + /* + * sizeof(*ctx) is small and the allocation should really not fail, + * otherwise the state of the hv_sock connections ends up in limbo. + */ + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); + + /* + * So far, these are not really used by Linux. Just set them to the + * reasonable values conforming to the definitions of the fields. + */ + ctx->msg.header.message_type = 1; + ctx->msg.header.payload_size = sizeof(*rescind); + + /* These values are actually used by Linux. */ + rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload; + rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; + rescind->child_relid = channel->offermsg.child_relid; + + INIT_WORK(&ctx->work, vmbus_onmessage_work); + + queue_work_on(vmbus_connection.connect_cpu, + vmbus_connection.work_queue, + &ctx->work); +} /* * Direct callback for channels using other deferred processing @@ -2091,6 +2127,25 @@ acpi_walk_err: static int vmbus_bus_suspend(struct device *dev) { + struct vmbus_channel *channel; + + while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { + /* + * We wait here until the completion of any channel + * offers that are currently in progress. + */ + msleep(1); + } + + mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (!is_hvsock_channel(channel)) + continue; + + vmbus_force_channel_rescinded(channel); + } + mutex_unlock(&vmbus_connection.channel_mutex); + vmbus_initiate_unload(false); vmbus_connection.conn_state = DISCONNECTED; From b307b38962eb0f22d1aa6dcf53cb7d3c2ed5eec7 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:21 +0000 Subject: [PATCH 12/13] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels Before suspend, Linux must make sure all the hv_sock channels have been properly cleaned up, because a hv_sock connection can not persist across hibernation, and the user-space app must be properly notified of the state change of the connection. Before suspend, Linux also must make sure all the sub-channels have been destroyed, i.e. the related channel structs of the sub-channels must be properly removed, otherwise they would cause a conflict when the sub-channels are recreated upon resume. Add a counter to track such channels, and vmbus_bus_suspend() should wait for the counter to drop to zero. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/channel_mgmt.c | 26 +++++++++++++++++++++++ drivers/hv/connection.c | 3 +++ drivers/hv/hyperv_vmbus.h | 12 +++++++++++ drivers/hv/vmbus_drv.c | 44 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 44b92fa1b7fa..5518d031f62a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -545,6 +545,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) mutex_lock(&vmbus_connection.channel_mutex); + /* Remember the channels that should be cleaned up upon suspend. */ + if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) + atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); + /* * Now that we have acquired the channel_mutex, * we can release the potentially racing rescind thread. @@ -944,6 +948,16 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) vmbus_process_offer(newchannel); } +static void check_ready_for_suspend_event(void) +{ + /* + * If all the sub-channels or hv_sock channels have been cleaned up, + * then it's safe to suspend. + */ + if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) + complete(&vmbus_connection.ready_for_suspend_event); +} + /* * vmbus_onoffer_rescind - Rescind offer handler. * @@ -954,6 +968,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) struct vmbus_channel_rescind_offer *rescind; struct vmbus_channel *channel; struct device *dev; + bool clean_up_chan_for_suspend; rescind = (struct vmbus_channel_rescind_offer *)hdr; @@ -993,6 +1008,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) return; } + clean_up_chan_for_suspend = is_hvsock_channel(channel) || + is_sub_channel(channel); /* * Before setting channel->rescind in vmbus_rescind_cleanup(), we * should make sure the channel callback is not running any more. @@ -1018,6 +1035,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); + + if (clean_up_chan_for_suspend) + check_ready_for_suspend_event(); + return; } /* @@ -1050,6 +1071,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) } mutex_unlock(&vmbus_connection.channel_mutex); } + + /* The "channel" may have been freed. Do not access it any longer. */ + + if (clean_up_chan_for_suspend) + check_ready_for_suspend_event(); } void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 806319cd5ccf..99851ea682eb 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -26,6 +26,9 @@ struct vmbus_connection vmbus_connection = { .conn_state = DISCONNECTED, .next_gpadl_handle = ATOMIC_INIT(0xE1E10), + + .ready_for_suspend_event= COMPLETION_INITIALIZER( + vmbus_connection.ready_for_suspend_event), }; EXPORT_SYMBOL_GPL(vmbus_connection); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index e657197a027a..974b747ca1fc 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -260,6 +260,18 @@ struct vmbus_connection { struct workqueue_struct *work_queue; struct workqueue_struct *handle_primary_chan_wq; struct workqueue_struct *handle_sub_chan_wq; + + /* + * The number of sub-channels and hv_sock channels that should be + * cleaned up upon suspend: sub-channels will be re-created upon + * resume, and hv_sock channels should not survive suspend. + */ + atomic_t nr_chan_close_on_suspend; + /* + * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to + * drop to zero. + */ + struct completion ready_for_suspend_event; }; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 45b976ec6e79..32ec951d334f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2127,7 +2127,8 @@ acpi_walk_err: static int vmbus_bus_suspend(struct device *dev) { - struct vmbus_channel *channel; + struct vmbus_channel *channel, *sc; + unsigned long flags; while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { /* @@ -2146,6 +2147,44 @@ static int vmbus_bus_suspend(struct device *dev) } mutex_unlock(&vmbus_connection.channel_mutex); + /* + * Wait until all the sub-channels and hv_sock channels have been + * cleaned up. Sub-channels should be destroyed upon suspend, otherwise + * they would conflict with the new sub-channels that will be created + * in the resume path. hv_sock channels should also be destroyed, but + * a hv_sock channel of an established hv_sock connection can not be + * really destroyed since it may still be referenced by the userspace + * application, so we just force the hv_sock channel to be rescinded + * by vmbus_force_channel_rescinded(), and the userspace application + * will thoroughly destroy the channel after hibernation. + * + * Note: the counter nr_chan_close_on_suspend may never go above 0 if + * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. + */ + if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) + wait_for_completion(&vmbus_connection.ready_for_suspend_event); + + mutex_lock(&vmbus_connection.channel_mutex); + + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (is_hvsock_channel(channel)) { + if (!channel->rescind) { + pr_err("hv_sock channel not rescinded!\n"); + WARN_ON_ONCE(1); + } + continue; + } + + spin_lock_irqsave(&channel->lock, flags); + list_for_each_entry(sc, &channel->sc_list, sc_list) { + pr_err("Sub-channel not deleted!\n"); + WARN_ON_ONCE(1); + } + spin_unlock_irqrestore(&channel->lock, flags); + } + + mutex_unlock(&vmbus_connection.channel_mutex); + vmbus_initiate_unload(false); vmbus_connection.conn_state = DISCONNECTED; @@ -2186,6 +2225,9 @@ static int vmbus_bus_resume(struct device *dev) vmbus_request_offers(); + /* Reset the event for the next suspend. */ + reinit_completion(&vmbus_connection.ready_for_suspend_event); + return 0; } From d8bd2d442bb2688b428ac7164e5dc6d95d4fa65b Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 5 Sep 2019 23:01:22 +0000 Subject: [PATCH 13/13] Drivers: hv: vmbus: Resume after fixing up old primary channels When the host re-offers the primary channels upon resume, the host only guarantees the Instance GUID doesn't change, so vmbus_bus_suspend() should invalidate channel->offermsg.child_relid and figure out the number of primary channels that need to be fixed up upon resume. Upon resume, vmbus_onoffer() finds the old channel structs, and maps the new offers to the old channels, and fixes up the old structs, and finally the resume callbacks of the VSC drivers will re-open the channels. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/channel_mgmt.c | 89 +++++++++++++++++++++++++++++---------- drivers/hv/connection.c | 2 + drivers/hv/hyperv_vmbus.h | 14 ++++++ drivers/hv/vmbus_drv.c | 17 ++++++++ include/linux/hyperv.h | 3 ++ 5 files changed, 103 insertions(+), 22 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5518d031f62a..8eb167540b4f 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel) cpumask_clear_cpu(channel->target_cpu, &primary_channel->alloced_cpus_in_node); - vmbus_release_relid(channel->offermsg.child_relid); + /* + * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and + * the relid is invalidated; after hibernation, when the user-space app + * destroys the channel, the relid is INVALID_RELID, and in this case + * it's unnecessary and unsafe to release the old relid, since the same + * relid can refer to a completely different channel now. + */ + if (channel->offermsg.child_relid != INVALID_RELID) + vmbus_release_relid(channel->offermsg.child_relid); free_channel(channel); } @@ -851,6 +859,36 @@ void vmbus_initiate_unload(bool crash) vmbus_wait_for_unload(); } +static void check_ready_for_resume_event(void) +{ + /* + * If all the old primary channels have been fixed up, then it's safe + * to resume. + */ + if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) + complete(&vmbus_connection.ready_for_resume_event); +} + +static void vmbus_setup_channel_state(struct vmbus_channel *channel, + struct vmbus_channel_offer_channel *offer) +{ + /* + * Setup state for signalling the host. + */ + channel->sig_event = VMBUS_EVENT_CONNECTION_ID; + + if (vmbus_proto_version != VERSION_WS2008) { + channel->is_dedicated_interrupt = + (offer->is_dedicated_interrupt != 0); + channel->sig_event = offer->connection_id; + } + + memcpy(&channel->offermsg, offer, + sizeof(struct vmbus_channel_offer_channel)); + channel->monitor_grp = (u8)offer->monitorid / 32; + channel->monitor_bit = (u8)offer->monitorid % 32; +} + /* * find_primary_channel_by_offer - Get the channel object given the new offer. * This is only used in the resume path of hibernation. @@ -902,14 +940,29 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) atomic_dec(&vmbus_connection.offer_in_progress); /* - * We're resuming from hibernation: we expect the host to send - * exactly the same offers that we had before the hibernation. + * We're resuming from hibernation: all the sub-channel and + * hv_sock channels we had before the hibernation should have + * been cleaned up, and now we must be seeing a re-offered + * primary channel that we had before the hibernation. */ - offer_sz = sizeof(*offer); - if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) - return; - pr_debug("Mismatched offer from the host (relid=%d)\n", + WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); + /* Fix up the relid. */ + oldchannel->offermsg.child_relid = offer->child_relid; + + offer_sz = sizeof(*offer); + if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) { + check_ready_for_resume_event(); + return; + } + + /* + * This is not an error, since the host can also change the + * other field(s) of the offer, e.g. on WS RS5 (Build 17763), + * the offer->connection_id of the Mellanox VF vmbus device + * can change when the host reoffers the device upon resume. + */ + pr_debug("vmbus offer changed: relid=%d\n", offer->child_relid); print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET, @@ -917,6 +970,12 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) false); print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET, 16, 4, offer, offer_sz, false); + + /* Fix up the old channel. */ + vmbus_setup_channel_state(oldchannel, offer); + + check_ready_for_resume_event(); + return; } @@ -929,21 +988,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) return; } - /* - * Setup state for signalling the host. - */ - newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID; - - if (vmbus_proto_version != VERSION_WS2008) { - newchannel->is_dedicated_interrupt = - (offer->is_dedicated_interrupt != 0); - newchannel->sig_event = offer->connection_id; - } - - memcpy(&newchannel->offermsg, offer, - sizeof(struct vmbus_channel_offer_channel)); - newchannel->monitor_grp = (u8)offer->monitorid / 32; - newchannel->monitor_bit = (u8)offer->monitorid % 32; + vmbus_setup_channel_state(newchannel, offer); vmbus_process_offer(newchannel); } diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 99851ea682eb..6e4c015783ff 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -29,6 +29,8 @@ struct vmbus_connection vmbus_connection = { .ready_for_suspend_event= COMPLETION_INITIALIZER( vmbus_connection.ready_for_suspend_event), + .ready_for_resume_event = COMPLETION_INITIALIZER( + vmbus_connection.ready_for_resume_event), }; EXPORT_SYMBOL_GPL(vmbus_connection); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 974b747ca1fc..f7a5f5615f34 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -272,6 +272,20 @@ struct vmbus_connection { * drop to zero. */ struct completion ready_for_suspend_event; + + /* + * The number of primary channels that should be "fixed up" + * upon resume: these channels are re-offered upon resume, and some + * fields of the channel offers (i.e. child_relid and connection_id) + * can change, so the old offermsg must be fixed up, before the resume + * callbacks of the VSC drivers start to further touch the channels. + */ + atomic_t nr_chan_fixup_on_resume; + /* + * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to + * drop to zero. + */ + struct completion ready_for_resume_event; }; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 32ec951d334f..391f0b225c9a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2164,9 +2164,17 @@ static int vmbus_bus_suspend(struct device *dev) if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) wait_for_completion(&vmbus_connection.ready_for_suspend_event); + WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0); + mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + /* + * Invalidate the field. Upon resume, vmbus_onoffer() will fix + * up the field, and the other fields (if necessary). + */ + channel->offermsg.child_relid = INVALID_RELID; + if (is_hvsock_channel(channel)) { if (!channel->rescind) { pr_err("hv_sock channel not rescinded!\n"); @@ -2181,6 +2189,8 @@ static int vmbus_bus_suspend(struct device *dev) WARN_ON_ONCE(1); } spin_unlock_irqrestore(&channel->lock, flags); + + atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); } mutex_unlock(&vmbus_connection.channel_mutex); @@ -2189,6 +2199,9 @@ static int vmbus_bus_suspend(struct device *dev) vmbus_connection.conn_state = DISCONNECTED; + /* Reset the event for the next resume. */ + reinit_completion(&vmbus_connection.ready_for_resume_event); + return 0; } @@ -2223,8 +2236,12 @@ static int vmbus_bus_resume(struct device *dev) if (ret != 0) return ret; + WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); + vmbus_request_offers(); + wait_for_completion(&vmbus_connection.ready_for_resume_event); + /* Reset the event for the next suspend. */ reinit_completion(&vmbus_connection.ready_for_suspend_event); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 8a60e7766037..a3aa9e9ef6f2 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -426,6 +426,9 @@ enum vmbus_channel_message_type { CHANNELMSG_COUNT }; +/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */ +#define INVALID_RELID U32_MAX + struct vmbus_channel_message_header { enum vmbus_channel_message_type msgtype; u32 padding;