From 473153291b8a131c86787fd9b076a529573e2c19 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:14:32 -0800 Subject: [PATCH 1/5] virtio_net: wrap rtnl_lock in test for calling with lock already held For XDP use case and to allow ethtool reset tests it is useful to be able to use reset paths from contexts where rtnl lock is already held. This requries updating virtnet_set_queues and free_receive_bufs the two places where rtnl_lock is taken in virtio_net. To do this we use the following pattern, _foo(...) { do stuff } foo(...) { rtnl_lock(); _foo(...); rtnl_unlock()}; this allows us to use freeze()/restore() flow from both contexts. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 29982c7f6080..bc814c079506 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1305,7 +1305,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) rtnl_unlock(); } -static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct scatterlist sg; struct net_device *dev = vi->dev; @@ -1331,6 +1331,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) return 0; } +static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +{ + int err; + + rtnl_lock(); + err = _virtnet_set_queues(vi, queue_pairs); + rtnl_unlock(); + return err; +} + static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -1583,7 +1593,7 @@ static int virtnet_set_channels(struct net_device *dev, return -EINVAL; get_online_cpus(); - err = virtnet_set_queues(vi, queue_pairs); + err = _virtnet_set_queues(vi, queue_pairs); if (!err) { netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); @@ -1715,7 +1725,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) return -ENOMEM; } - err = virtnet_set_queues(vi, curr_qp + xdp_qp); + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) { dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); return err; @@ -1724,7 +1734,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) { - virtnet_set_queues(vi, curr_qp); + _virtnet_set_queues(vi, curr_qp); return PTR_ERR(prog); } } @@ -1840,12 +1850,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) kfree(vi->sq); } -static void free_receive_bufs(struct virtnet_info *vi) +static void _free_receive_bufs(struct virtnet_info *vi) { struct bpf_prog *old_prog; int i; - rtnl_lock(); for (i = 0; i < vi->max_queue_pairs; i++) { while (vi->rq[i].pages) __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); @@ -1855,6 +1864,12 @@ static void free_receive_bufs(struct virtnet_info *vi) if (old_prog) bpf_prog_put(old_prog); } +} + +static void free_receive_bufs(struct virtnet_info *vi) +{ + rtnl_lock(); + _free_receive_bufs(vi); rtnl_unlock(); } @@ -2293,9 +2308,7 @@ static int virtnet_probe(struct virtio_device *vdev) goto free_unregister_netdev; } - rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); - rtnl_unlock(); /* Assume link up if device can't report link status, otherwise get link status from config. */ @@ -2404,9 +2417,7 @@ static int virtnet_restore(struct virtio_device *vdev) netif_device_attach(vi->dev); - rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); - rtnl_unlock(); err = virtnet_cpu_notif_add(vi); if (err) From 0354e4d19cd5de51d6096a07ba44eda0ee61a6e9 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:15:01 -0800 Subject: [PATCH 2/5] virtio_net: factor out xdp handler for readability At this point the do_xdp_prog is mostly if/else branches handling the different modes of virtio_net. So remove it and handle running the program in the per mode handlers. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 86 ++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index bc814c079506..68221655740e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -398,52 +398,6 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, return true; } -static u32 do_xdp_prog(struct virtnet_info *vi, - struct receive_queue *rq, - struct bpf_prog *xdp_prog, - void *data, int len) -{ - int hdr_padded_len; - struct xdp_buff xdp; - void *buf; - unsigned int qp; - u32 act; - - if (vi->mergeable_rx_bufs) { - hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); - xdp.data = data + hdr_padded_len; - xdp.data_end = xdp.data + (len - vi->hdr_len); - buf = data; - } else { /* small buffers */ - struct sk_buff *skb = data; - - xdp.data = skb->data; - xdp.data_end = xdp.data + len; - buf = skb->data; - } - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - switch (act) { - case XDP_PASS: - return XDP_PASS; - case XDP_TX: - qp = vi->curr_queue_pairs - - vi->xdp_queue_pairs + - smp_processor_id(); - xdp.data = buf; - if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, - data))) - trace_xdp_exception(vi->dev, xdp_prog, act); - return XDP_TX; - default: - bpf_warn_invalid_xdp_action(act); - case XDP_ABORTED: - trace_xdp_exception(vi->dev, xdp_prog, act); - case XDP_DROP: - return XDP_DROP; - } -} - static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -459,19 +413,34 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + struct xdp_buff xdp; + unsigned int qp; u32 act; if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, skb, len); + + xdp.data = skb->data; + xdp.data_end = xdp.data + len; + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { case XDP_PASS: break; case XDP_TX: + qp = vi->curr_queue_pairs - + vi->xdp_queue_pairs + + smp_processor_id(); + if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], + &xdp, skb))) + trace_xdp_exception(vi->dev, xdp_prog, act); rcu_read_unlock(); goto xdp_xmit; - case XDP_DROP: default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); + case XDP_DROP: goto err_xdp; } } @@ -589,6 +558,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct page *xdp_page; + struct xdp_buff xdp; + unsigned int qp; + void *data; u32 act; /* This happens when rx buffer size is underestimated */ @@ -611,8 +583,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, - page_address(xdp_page) + offset, len); + data = page_address(xdp_page) + offset; + xdp.data = data + vi->hdr_len; + xdp.data_end = xdp.data + (len - vi->hdr_len); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { case XDP_PASS: /* We can only create skb based on xdp_page. */ @@ -626,13 +601,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: + qp = vi->curr_queue_pairs - + vi->xdp_queue_pairs + + smp_processor_id(); + if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], + &xdp, data))) + trace_xdp_exception(vi->dev, xdp_prog, act); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); goto xdp_xmit; - case XDP_DROP: default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); + case XDP_DROP: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); From 722d82830a04ccff6c7390535fcdfb1e4d69d126 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:15:32 -0800 Subject: [PATCH 3/5] virtio_net: remove duplicate queue pair binding in XDP Factor out qp assignment. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 68221655740e..3b3c5c571b6d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -340,15 +340,19 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, static bool virtnet_xdp_xmit(struct virtnet_info *vi, struct receive_queue *rq, - struct send_queue *sq, struct xdp_buff *xdp, void *data) { struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int num_sg, len; + struct send_queue *sq; + unsigned int qp; void *xdp_sent; int err; + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + /* Free up any pending old buffers before queueing new ones. */ while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (vi->mergeable_rx_bufs) { @@ -414,7 +418,6 @@ static struct sk_buff *receive_small(struct net_device *dev, if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; struct xdp_buff xdp; - unsigned int qp; u32 act; if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) @@ -428,11 +431,7 @@ static struct sk_buff *receive_small(struct net_device *dev, case XDP_PASS: break; case XDP_TX: - qp = vi->curr_queue_pairs - - vi->xdp_queue_pairs + - smp_processor_id(); - if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], - &xdp, skb))) + if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, skb))) trace_xdp_exception(vi->dev, xdp_prog, act); rcu_read_unlock(); goto xdp_xmit; @@ -559,7 +558,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (xdp_prog) { struct page *xdp_page; struct xdp_buff xdp; - unsigned int qp; void *data; u32 act; @@ -601,11 +599,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - qp = vi->curr_queue_pairs - - vi->xdp_queue_pairs + - smp_processor_id(); - if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], - &xdp, data))) + if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, data))) trace_xdp_exception(vi->dev, xdp_prog, act); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) From 9fe7bfce8b3e112e8e08c40deb72ee7e24c6f072 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:16:01 -0800 Subject: [PATCH 4/5] virtio_net: refactor freeze/restore logic into virtnet reset logic For XDP we will need to reset the queues to allow for buffer headroom to be configured. In order to do this we need to essentially run the freeze()/restore() code path. Unfortunately the locking requirements between the freeze/restore and reset paths are different however so we can not simply reuse the code. This patch refactors the code path and adds a reset helper routine. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 75 ++++++++++++++++++++++++---------------- drivers/virtio/virtio.c | 42 ++++++++++++---------- include/linux/virtio.h | 4 +++ 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3b3c5c571b6d..4cc3da1e4070 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1661,6 +1661,49 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_settings = virtnet_set_settings, }; +static void virtnet_freeze_down(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + int i; + + /* Make sure no work handler is accessing the device */ + flush_work(&vi->config_work); + + netif_device_detach(vi->dev); + cancel_delayed_work_sync(&vi->refill); + + if (netif_running(vi->dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); + } +} + +static int init_vqs(struct virtnet_info *vi); + +static int virtnet_restore_up(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + int err, i; + + err = init_vqs(vi); + if (err) + return err; + + virtio_device_ready(vdev); + + if (netif_running(vi->dev)) { + for (i = 0; i < vi->curr_queue_pairs; i++) + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(&vi->rq[i]); + } + + netif_device_attach(vi->dev); + return err; +} + static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) { unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); @@ -2353,21 +2396,9 @@ static void virtnet_remove(struct virtio_device *vdev) static int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; virtnet_cpu_notif_remove(vi); - - /* Make sure no work handler is accessing the device */ - flush_work(&vi->config_work); - - netif_device_detach(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) - napi_disable(&vi->rq[i].napi); - } - + virtnet_freeze_down(vdev); remove_vq_common(vi); return 0; @@ -2376,25 +2407,11 @@ static int virtnet_freeze(struct virtio_device *vdev) static int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; - err = init_vqs(vi); + err = virtnet_restore_up(vdev); if (err) return err; - - virtio_device_ready(vdev); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(&vi->rq[i]); - } - - netif_device_attach(vi->dev); - virtnet_set_queues(vi, vi->curr_queue_pairs); err = virtnet_cpu_notif_add(vi); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7062bb0975a5..400d70b69379 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -100,11 +100,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) dev->id.device, dev->id.vendor); } -static void add_status(struct virtio_device *dev, unsigned status) -{ - dev->config->set_status(dev, dev->config->get_status(dev) | status); -} - void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit) { @@ -145,14 +140,15 @@ void virtio_config_changed(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_config_changed); -static void virtio_config_disable(struct virtio_device *dev) +void virtio_config_disable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = false; spin_unlock_irq(&dev->config_lock); } +EXPORT_SYMBOL_GPL(virtio_config_disable); -static void virtio_config_enable(struct virtio_device *dev) +void virtio_config_enable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = true; @@ -161,8 +157,15 @@ static void virtio_config_enable(struct virtio_device *dev) dev->config_change_pending = false; spin_unlock_irq(&dev->config_lock); } +EXPORT_SYMBOL_GPL(virtio_config_enable); -static int virtio_finalize_features(struct virtio_device *dev) +void virtio_add_status(struct virtio_device *dev, unsigned int status) +{ + dev->config->set_status(dev, dev->config->get_status(dev) | status); +} +EXPORT_SYMBOL_GPL(virtio_add_status); + +int virtio_finalize_features(struct virtio_device *dev) { int ret = dev->config->finalize_features(dev); unsigned status; @@ -173,7 +176,7 @@ static int virtio_finalize_features(struct virtio_device *dev) if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) return 0; - add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + virtio_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); status = dev->config->get_status(dev); if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { dev_err(&dev->dev, "virtio: device refuses features: %x\n", @@ -182,6 +185,7 @@ static int virtio_finalize_features(struct virtio_device *dev) } return 0; } +EXPORT_SYMBOL_GPL(virtio_finalize_features); static int virtio_dev_probe(struct device *_d) { @@ -193,7 +197,7 @@ static int virtio_dev_probe(struct device *_d) u64 driver_features_legacy; /* We have a driver! */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); /* Figure out what features the device supports. */ device_features = dev->config->get_features(dev); @@ -247,7 +251,7 @@ static int virtio_dev_probe(struct device *_d) return 0; err: - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; } @@ -265,7 +269,7 @@ static int virtio_dev_remove(struct device *_d) WARN_ON_ONCE(dev->config->get_status(dev)); /* Acknowledge the device's existence again. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); return 0; } @@ -316,7 +320,7 @@ int register_virtio_device(struct virtio_device *dev) dev->config->reset(dev); /* Acknowledge that we've seen the device. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); INIT_LIST_HEAD(&dev->vqs); @@ -325,7 +329,7 @@ int register_virtio_device(struct virtio_device *dev) err = device_register(&dev->dev); out: if (err) - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; } EXPORT_SYMBOL_GPL(register_virtio_device); @@ -365,18 +369,18 @@ int virtio_device_restore(struct virtio_device *dev) dev->config->reset(dev); /* Acknowledge that we've seen the device. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); /* Maybe driver failed before freeze. * Restore the failed status, for debugging. */ if (dev->failed) - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); if (!drv) return 0; /* We have a driver! */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); ret = virtio_finalize_features(dev); if (ret) @@ -389,14 +393,14 @@ int virtio_device_restore(struct virtio_device *dev) } /* Finally, tell the device we're all set */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); virtio_config_enable(dev); return 0; err: - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; } EXPORT_SYMBOL_GPL(virtio_device_restore); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d5eb5479a425..04b0d3f95043 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) return container_of(_dev, struct virtio_device, dev); } +void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); +void virtio_config_disable(struct virtio_device *dev); +void virtio_config_enable(struct virtio_device *dev); +int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); From 2de2f7f40ef92313d76c3df7f545be5d0899b1aa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:16:29 -0800 Subject: [PATCH 5/5] virtio_net: XDP support for adjust_head Add support for XDP adjust head by allocating a 256B header region that XDP programs can grow into. This is only enabled when a XDP program is loaded. In order to ensure that we do not have to unwind queue headroom push queue setup below bpf_prog_add. It reads better to do a prog ref unwind vs another queue setup call. At the moment this code must do a full reset to ensure old buffers without headroom on program add or with headroom on program removal are not used incorrectly in the datapath. Ideally we would only have to disable/enable the RX queues being updated but there is no API to do this at the moment in virtio so use the big hammer. In practice it is likely not that big of a problem as this will only happen when XDP is enabled/disabled changing programs does not require the reset. There is some risk that the driver may either have an allocation failure or for some reason fail to correctly negotiate with the underlying backend in this case the driver will be left uninitialized. I have not seen this ever happen on my test systems and for what its worth this same failure case can occur from probe and other contexts in virtio framework. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 154 +++++++++++++++++++++++++++++++-------- 1 file changed, 125 insertions(+), 29 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4cc3da1e4070..11e28530c83c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -41,6 +41,9 @@ module_param(gso, bool, 0444); #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_COPY_LEN 128 +/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ +#define VIRTIO_XDP_HEADROOM 256 + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -367,6 +370,7 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, } if (vi->mergeable_rx_bufs) { + xdp->data -= sizeof(struct virtio_net_hdr_mrg_rxbuf); /* Zero header and leave csum up to XDP layers */ hdr = xdp->data; memset(hdr, 0, vi->hdr_len); @@ -383,7 +387,9 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, num_sg = 2; sg_init_table(sq->sg, 2); sg_set_buf(sq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); + skb_to_sgvec(skb, sq->sg + 1, + xdp->data - xdp->data_hard_start, + xdp->data_end - xdp->data); } err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, data, GFP_ATOMIC); @@ -411,7 +417,6 @@ static struct sk_buff *receive_small(struct net_device *dev, struct bpf_prog *xdp_prog; len -= vi->hdr_len; - skb_trim(skb, len); rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -423,12 +428,16 @@ static struct sk_buff *receive_small(struct net_device *dev, if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - xdp.data = skb->data; + xdp.data_hard_start = skb->data; + xdp.data = skb->data + VIRTIO_XDP_HEADROOM; xdp.data_end = xdp.data + len; act = bpf_prog_run_xdp(xdp_prog, &xdp); switch (act) { case XDP_PASS: + /* Recalculate length in case bpf program changed it */ + __skb_pull(skb, xdp.data - xdp.data_hard_start); + len = xdp.data_end - xdp.data; break; case XDP_TX: if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, skb))) @@ -445,6 +454,7 @@ static struct sk_buff *receive_small(struct net_device *dev, } rcu_read_unlock(); + skb_trim(skb, len); return skb; err_xdp: @@ -493,7 +503,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, unsigned int *len) { struct page *page = alloc_page(GFP_ATOMIC); - unsigned int page_off = 0; + unsigned int page_off = VIRTIO_XDP_HEADROOM; if (!page) return NULL; @@ -529,7 +539,8 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, put_page(p); } - *len = page_off; + /* Headroom does not contribute to packet length */ + *len = page_off - VIRTIO_XDP_HEADROOM; return page; err_buf: __free_pages(page, 0); @@ -568,7 +579,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, page, offset, &len); if (!xdp_page) goto err_xdp; - offset = 0; + offset = VIRTIO_XDP_HEADROOM; } else { xdp_page = page; } @@ -581,19 +592,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* Allow consuming headroom but reserve enough space to push + * the descriptor on if we get an XDP_TX return code. + */ data = page_address(xdp_page) + offset; + xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; xdp.data_end = xdp.data + (len - vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); switch (act) { case XDP_PASS: + /* recalculate offset to account for any header + * adjustments. Note other cases do not build an + * skb and avoid using offset + */ + offset = xdp.data - + page_address(xdp_page) - vi->hdr_len; + /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, - 0, len, PAGE_SIZE); + offset, len, PAGE_SIZE); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } @@ -760,23 +782,30 @@ frame_err: dev_kfree_skb(skb); } +static unsigned int virtnet_get_headroom(struct virtnet_info *vi) +{ + return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; +} + static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { + int headroom = GOOD_PACKET_LEN + virtnet_get_headroom(vi); + unsigned int xdp_headroom = virtnet_get_headroom(vi); struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; int err; - skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); + skb = __netdev_alloc_skb_ip_align(vi->dev, headroom, gfp); if (unlikely(!skb)) return -ENOMEM; - skb_put(skb, GOOD_PACKET_LEN); + skb_put(skb, headroom); hdr = skb_vnet_hdr(skb); sg_init_table(rq->sg, 2); sg_set_buf(rq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + skb_to_sgvec(skb, rq->sg + 1, xdp_headroom, skb->len - xdp_headroom); err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); if (err < 0) @@ -844,24 +873,27 @@ static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len) return ALIGN(len, MERGEABLE_BUFFER_ALIGN); } -static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) +static int add_recvbuf_mergeable(struct virtnet_info *vi, + struct receive_queue *rq, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; + unsigned int headroom = virtnet_get_headroom(vi); char *buf; unsigned long ctx; int err; unsigned int len, hole; len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); - if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) + if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + buf += headroom; /* advance address leaving hole at front of pkt */ ctx = mergeable_buf_to_ctx(buf, len); get_page(alloc_frag->page); - alloc_frag->offset += len; + alloc_frag->offset += len + headroom; hole = alloc_frag->size - alloc_frag->offset; - if (hole < len) { + if (hole < len + headroom) { /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. This extra space is not included in @@ -895,7 +927,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, gfp |= __GFP_COLD; do { if (vi->mergeable_rx_bufs) - err = add_recvbuf_mergeable(rq, gfp); + err = add_recvbuf_mergeable(vi, rq, gfp); else if (vi->big_packets) err = add_recvbuf_big(vi, rq, gfp); else @@ -1679,6 +1711,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev) } static int init_vqs(struct virtnet_info *vi); +static void _remove_vq_common(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { @@ -1704,19 +1737,47 @@ static int virtnet_restore_up(struct virtio_device *vdev) return err; } +static int virtnet_reset(struct virtnet_info *vi) +{ + struct virtio_device *dev = vi->vdev; + int ret; + + virtio_config_disable(dev); + dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; + virtnet_freeze_down(dev); + _remove_vq_common(vi); + + dev->config->reset(dev); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); + + ret = virtio_finalize_features(dev); + if (ret) + goto err; + + ret = virtnet_restore_up(dev); + if (ret) + goto err; + ret = _virtnet_set_queues(vi, vi->curr_queue_pairs); + if (ret) + goto err; + + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + virtio_config_enable(dev); + return 0; +err: + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); + return ret; +} + static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) { unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; - u16 xdp_qp = 0, curr_qp; + u16 oxdp_qp, xdp_qp = 0, curr_qp; int i, err; - if (prog && prog->xdp_adjust_head) { - netdev_warn(dev, "Does not support bpf_xdp_adjust_head()\n"); - return -EOPNOTSUPP; - } - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || @@ -1746,21 +1807,32 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) return -ENOMEM; } + if (prog) { + prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) { dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); - return err; + goto virtio_queue_err; } - if (prog) { - prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); - if (IS_ERR(prog)) { - _virtnet_set_queues(vi, curr_qp); - return PTR_ERR(prog); - } + oxdp_qp = vi->xdp_queue_pairs; + + /* Changing the headroom in buffers is a disruptive operation because + * existing buffers must be flushed and reallocated. This will happen + * when a xdp program is initially added or xdp is disabled by removing + * the xdp program resulting in number of XDP queues changing. + */ + if (vi->xdp_queue_pairs != xdp_qp) { + vi->xdp_queue_pairs = xdp_qp; + err = virtnet_reset(vi); + if (err) + goto virtio_reset_err; } - vi->xdp_queue_pairs = xdp_qp; netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1771,6 +1843,21 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) } return 0; + +virtio_reset_err: + /* On reset error do our best to unwind XDP changes inflight and return + * error up to user space for resolution. The underlying reset hung on + * us so not much we can do here. + */ + dev_warn(&dev->dev, "XDP reset failure and queues unstable\n"); + vi->xdp_queue_pairs = oxdp_qp; +virtio_queue_err: + /* On queue set error we can unwind bpf ref count and user space can + * retry this is most likely an allocation failure. + */ + if (prog) + bpf_prog_sub(prog, vi->max_queue_pairs - 1); + return err; } static bool virtnet_xdp_query(struct net_device *dev) @@ -2361,6 +2448,15 @@ free: return err; } +static void _remove_vq_common(struct virtnet_info *vi) +{ + vi->vdev->config->reset(vi->vdev); + free_unused_bufs(vi); + _free_receive_bufs(vi); + free_receive_page_frags(vi); + virtnet_del_vqs(vi); +} + static void remove_vq_common(struct virtnet_info *vi) { vi->vdev->config->reset(vi->vdev);