From 515866f8185b92fb18a782408c53839f003c7669 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:50 +0300 Subject: [PATCH 1/5] ipvlan: remove counters of ipv4 and ipv6 addresses They are unused after commit f631c44bbe15 ("ipvlan: Always set broadcast bit in multicast filter"). Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 2 -- drivers/net/ipvlan/ipvlan_main.c | 33 ++++++++++++-------------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 953a97492fab..68e2549c28c6 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -67,8 +67,6 @@ struct ipvl_dev { struct ipvl_port *port; struct net_device *phy_dev; struct list_head addrs; - int ipv4cnt; - int ipv6cnt; struct ipvl_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 1acc283160d9..048ecf0c76fb 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -153,10 +153,9 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_add(ipvlan, addr); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_add(ipvlan, addr); + return dev_uc_add(phy_dev, phy_dev->dev_addr); } @@ -171,10 +170,9 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_del(addr, !dev->dismantle); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_del(addr, !dev->dismantle); + return 0; } @@ -471,8 +469,6 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->port = port; ipvlan->sfeatures = IPVLAN_FEATURES; INIT_LIST_HEAD(&ipvlan->addrs); - ipvlan->ipv4cnt = 0; - ipvlan->ipv6cnt = 0; /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -508,12 +504,11 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { - ipvlan_ht_addr_del(addr, !dev->dismantle); - list_del(&addr->anode); - } + list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { + ipvlan_ht_addr_del(addr, !dev->dismantle); + list_del(&addr->anode); } + list_del_rcu(&ipvlan->pnode); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(ipvlan->phy_dev, dev); @@ -627,7 +622,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); addr->atype = IPVL_IPV6; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv6cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ @@ -647,8 +642,6 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) ipvlan_ht_addr_del(addr, true); list_del(&addr->anode); - ipvlan->ipv6cnt--; - WARN_ON(ipvlan->ipv6cnt < 0); kfree_rcu(addr, rcu); return; @@ -699,7 +692,7 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); addr->atype = IPVL_IPV4; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv4cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ @@ -719,8 +712,6 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) ipvlan_ht_addr_del(addr, true); list_del(&addr->anode); - ipvlan->ipv4cnt--; - WARN_ON(ipvlan->ipv4cnt < 0); kfree_rcu(addr, rcu); return; From 6a725497318545aae246232ed05a8df9cffb0a02 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:51 +0300 Subject: [PATCH 2/5] ipvlan: plug memory leak in ipvlan_link_delete Add missing kfree_rcu(addr, rcu); Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 048ecf0c76fb..7d81e37c3f76 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -507,6 +507,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr, !dev->dismantle); list_del(&addr->anode); + kfree_rcu(addr, rcu); } list_del_rcu(&ipvlan->pnode); From 6640e673c6f3dbaace085ca2686a8a343dc23a71 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:53 +0300 Subject: [PATCH 3/5] ipvlan: unhash addresses without synchronize_rcu All structures used in traffic forwarding are rcu-protected: ipvl_addr, ipvl_dev and ipvl_port. Thus we can unhash addresses without synchronization. We'll anyway hash it back into the same bucket: in worst case lockless lookup will scan hash once again. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 2 +- drivers/net/ipvlan/ipvlan_core.c | 4 +--- drivers/net/ipvlan/ipvlan_main.c | 8 ++++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 68e2549c28c6..40f9d7e4a0ea 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -122,5 +122,5 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6); -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); +void ipvlan_ht_addr_del(struct ipvl_addr *addr); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8afbedad620d..8f8628a0adba 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -85,11 +85,9 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) +void ipvlan_ht_addr_del(struct ipvl_addr *addr) { hlist_del_init_rcu(&addr->hlnode); - if (sync) - synchronize_rcu(); } struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 7d81e37c3f76..e995bc501ee6 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -171,7 +171,7 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_del(addr, !dev->dismantle); + ipvlan_ht_addr_del(addr); return 0; } @@ -505,7 +505,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_addr *addr, *next; list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { - ipvlan_ht_addr_del(addr, !dev->dismantle); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); } @@ -641,7 +641,7 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); @@ -711,7 +711,7 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); kfree_rcu(addr, rcu); From 0fba37a3af03a7e74bf9e75473729adb98da49c3 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 14 Jul 2015 16:35:54 +0300 Subject: [PATCH 4/5] ipvlan: use rcu_deference_bh() in ipvlan_queue_xmit() In tx path rcu_read_lock_bh() is held, so we need rcu_deference_bh(). This fixes the following warning: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #1007 Not tainted ------------------------------- drivers/net/ipvlan/ipvlan.h:106 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by dhclient/1076: #0: (rcu_read_lock_bh){......}, at: [] rcu_lock_acquire+0x0/0x26 stack backtrace: CPU: 2 PID: 1076 Comm: dhclient Not tainted 4.1.0-rc1+ #1007 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000001 ffff8800d381bac8 ffffffff81a4154f 000000003c1a3c19 ffff8800d4d0a690 ffff8800d381baf8 ffffffff810b849f ffff880117d41148 ffff880117d40000 ffff880117d40068 0000000000000156 ffff8800d381bb18 Call Trace: [] dump_stack+0x4c/0x65 [] lockdep_rcu_suspicious+0x107/0x110 [] ipvlan_port_get_rcu+0x47/0x4e [] ipvlan_queue_xmit+0x35/0x450 [] ? rcu_read_unlock+0x3e/0x5f [] ? local_clock+0x19/0x22 [] ? __lock_is_held+0x39/0x52 [] ipvlan_start_xmit+0x1b/0x44 [] dev_hard_start_xmit+0x2ae/0x467 [] __dev_queue_xmit+0x50a/0x60c [] dev_queue_xmit_sk+0x13/0x15 [] dev_queue_xmit+0x10/0x12 [] packet_sendmsg+0xb6b/0xbdf [] ? mark_lock+0x2e/0x226 [] ? sched_clock_cpu+0x9e/0xb7 [] sock_sendmsg_nosec+0x12/0x1d [] sock_sendmsg+0x29/0x2e [] sock_write_iter+0x70/0x91 [] __vfs_write+0x7e/0xa7 [] vfs_write+0x92/0xe8 [] SyS_write+0x47/0x7e [] system_call_fastpath+0x12/0x6f Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Cc: Mahesh Bandewar Signed-off-by: Cong Wang Acked-by: Mahesh Bandewar Acked-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 5 +++++ drivers/net/ipvlan/ipvlan_core.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 40f9d7e4a0ea..9542b7bac61a 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -104,6 +104,11 @@ static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) return rcu_dereference(d->rx_handler_data); } +static inline struct ipvl_port *ipvlan_port_get_rcu_bh(const struct net_device *d) +{ + return rcu_dereference_bh(d->rx_handler_data); +} + static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) { return rtnl_dereference(d->rx_handler_data); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8f8628a0adba..207f62e8de9a 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -529,7 +529,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); + struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); if (!port) goto out; From 23a5a49c83dd8a7201a42e96d24238bde3547c11 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Jul 2015 16:35:55 +0300 Subject: [PATCH 5/5] ipvlan: ignore addresses from ipv6 autoconfiguration Inet6addr notifier is atomic and runs in bh context without RTNL when ipv6 receives router advertisement packet and performs autoconfiguration. Proper fix still in discussion. Let's at least plug the bug. v1: http://lkml.kernel.org/r/20150514135618.14062.1969.stgit@buzz v2: http://lkml.kernel.org/r/20150703125840.24121.91556.stgit@buzz Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index e995bc501ee6..20b58bdecf75 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -655,6 +655,10 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + /* FIXME IPv6 autoconf calls us from bh without RTNL */ + if (in_softirq()) + return NOTIFY_DONE; + if (!netif_is_ipvlan(dev)) return NOTIFY_DONE;