diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 11b9525dff27..69ef9cce5858 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, #if IS_ENABLED(CONFIG_IPV6) /* send to link-local or multicast address via interface enslaved to * VRF device. Force lookup to VRF table without changing flow struct + * Note: Caller to this function must hold rcu_read_lock() and no refcnt + * is taken on the dst by this function. */ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, struct flowi6 *fl6) { struct net *net = dev_net(dev); - int flags = RT6_LOOKUP_F_IFACE; + int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF; struct dst_entry *dst = NULL; struct rt6_info *rt; @@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, */ if (fl6->flowi6_oif == dev->ifindex) { dst = &net->ipv6.ip6_null_entry->dst; - dst_hold(dst); return dst; } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0709835c01ad..89ad7917b98d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -84,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); +struct dst_entry *ip6_route_output_flags_noref(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, int flags); + struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 66fc69ef5909..3975ae8e2440 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2415,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); } -struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_output_flags_noref(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, int flags) { bool any_src; @@ -2424,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { struct dst_entry *dst; + /* This function does not take refcnt on the dst */ dst = l3mdev_link_scope_lookup(net, fl6); if (dst) return dst; @@ -2431,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; + flags |= RT6_LOOKUP_F_DST_NOREF; any_src = ipv6_addr_any(&fl6->saddr); if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || (fl6->flowi6_oif && any_src)) @@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } +EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref); + +struct dst_entry *ip6_route_output_flags(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, + int flags) +{ + struct dst_entry *dst; + struct rt6_info *rt6; + + rcu_read_lock(); + dst = ip6_route_output_flags_noref(net, sk, fl6, flags); + rt6 = (struct rt6_info *)dst; + /* For dst cached in uncached_list, refcnt is already taken. */ + if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + } + rcu_read_unlock(); + + return dst; +} EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index cfc9fcb97465..f35899d45a9a 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup + * This function does not hold refcnt on the returned dst. + * Caller must hold rcu_read_lock(). */ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, @@ -126,9 +128,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct dst_entry *dst = NULL; struct net_device *dev; + WARN_ON_ONCE(!rcu_read_lock_held()); if (fl6->flowi6_oif) { - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (dev && netif_is_l3_slave(dev)) dev = netdev_master_upper_dev_get_rcu(dev); @@ -136,8 +137,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_link_scope_lookup) dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); - - rcu_read_unlock(); } return dst;