alistair23-linux/net/dsa/tag_sja1105.c
Vladimir Oltean 3e8db7e560 net: dsa: sja1105: Fix sleeping while atomic in .port_hwtstamp_set
Currently this stack trace can be seen with CONFIG_DEBUG_ATOMIC_SLEEP=y:

[   41.568348] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:909
[   41.576757] in_atomic(): 1, irqs_disabled(): 0, pid: 208, name: ptp4l
[   41.583212] INFO: lockdep is turned off.
[   41.587123] CPU: 1 PID: 208 Comm: ptp4l Not tainted 5.3.0-rc6-01445-ge950f2d4bc7f-dirty #1827
[   41.599873] [<c0313d7c>] (unwind_backtrace) from [<c030e13c>] (show_stack+0x10/0x14)
[   41.607584] [<c030e13c>] (show_stack) from [<c1212d50>] (dump_stack+0xd4/0x100)
[   41.614863] [<c1212d50>] (dump_stack) from [<c037dfc8>] (___might_sleep+0x1c8/0x2b4)
[   41.622574] [<c037dfc8>] (___might_sleep) from [<c122ea90>] (__mutex_lock+0x48/0xab8)
[   41.630368] [<c122ea90>] (__mutex_lock) from [<c122f51c>] (mutex_lock_nested+0x1c/0x24)
[   41.638340] [<c122f51c>] (mutex_lock_nested) from [<c0c6fe08>] (sja1105_static_config_reload+0x30/0x27c)
[   41.647779] [<c0c6fe08>] (sja1105_static_config_reload) from [<c0c7015c>] (sja1105_hwtstamp_set+0x108/0x1cc)
[   41.657562] [<c0c7015c>] (sja1105_hwtstamp_set) from [<c0feb650>] (dev_ifsioc+0x18c/0x330)
[   41.665788] [<c0feb650>] (dev_ifsioc) from [<c0febbd8>] (dev_ioctl+0x320/0x6e8)
[   41.673064] [<c0febbd8>] (dev_ioctl) from [<c0f8b1f4>] (sock_ioctl+0x334/0x5e8)
[   41.680340] [<c0f8b1f4>] (sock_ioctl) from [<c05404a8>] (do_vfs_ioctl+0xb0/0xa10)
[   41.687789] [<c05404a8>] (do_vfs_ioctl) from [<c0540e3c>] (ksys_ioctl+0x34/0x58)
[   41.695151] [<c0540e3c>] (ksys_ioctl) from [<c0301000>] (ret_fast_syscall+0x0/0x28)
[   41.702768] Exception stack(0xe8495fa8 to 0xe8495ff0)
[   41.707796] 5fa0:                   beff4a8c 00000001 00000011 000089b0 beff4a8c beff4a80
[   41.715933] 5fc0: beff4a8c 00000001 0000000c 00000036 b6fa98c8 004e19c1 00000001 00000000
[   41.724069] 5fe0: 004dcedc beff4a6c 004c0738 b6e7af4c
[   41.729860] BUG: scheduling while atomic: ptp4l/208/0x00000002
[   41.735682] INFO: lockdep is turned off.

Enabling RX timestamping will logically disturb the fastpath (processing
of meta frames). Replace bool hwts_rx_en with a bit that is checked
atomically from the fastpath and temporarily unset from the sleepable
context during a change of the RX timestamping process (a destructive
operation anyways, requires switch reset).
If found unset, the fastpath (net/dsa/tag_sja1105.c) will just drop any
received meta frame and not take the meta_lock at all.

Fixes: a602afd200 ("net: dsa: sja1105: Expose PTP timestamping ioctls to userspace")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-02 12:19:53 -04:00

309 lines
8.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#include <linux/if_vlan.h>
#include <linux/dsa/sja1105.h>
#include <linux/dsa/8021q.h>
#include <linux/packing.h>
#include "dsa_priv.h"
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
static inline bool sja1105_is_link_local(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 dmac = ether_addr_to_u64(hdr->h_dest);
if (ntohs(hdr->h_proto) == ETH_P_SJA1105_META)
return false;
if ((dmac & SJA1105_LINKLOCAL_FILTER_A_MASK) ==
SJA1105_LINKLOCAL_FILTER_A)
return true;
if ((dmac & SJA1105_LINKLOCAL_FILTER_B_MASK) ==
SJA1105_LINKLOCAL_FILTER_B)
return true;
return false;
}
struct sja1105_meta {
u64 tstamp;
u64 dmac_byte_4;
u64 dmac_byte_3;
u64 source_port;
u64 switch_id;
};
static void sja1105_meta_unpack(const struct sk_buff *skb,
struct sja1105_meta *meta)
{
u8 *buf = skb_mac_header(skb) + ETH_HLEN;
/* UM10944.pdf section 4.2.17 AVB Parameters:
* Structure of the meta-data follow-up frame.
* It is in network byte order, so there are no quirks
* while unpacking the meta frame.
*
* Also SJA1105 E/T only populates bits 23:0 of the timestamp
* whereas P/Q/R/S does 32 bits. Since the structure is the
* same and the E/T puts zeroes in the high-order byte, use
* a unified unpacking command for both device series.
*/
packing(buf, &meta->tstamp, 31, 0, 4, UNPACK, 0);
packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0);
packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
}
static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 smac = ether_addr_to_u64(hdr->h_source);
u64 dmac = ether_addr_to_u64(hdr->h_dest);
if (smac != SJA1105_META_SMAC)
return false;
if (dmac != SJA1105_META_DMAC)
return false;
if (ntohs(hdr->h_proto) != ETH_P_SJA1105_META)
return false;
return true;
}
/* This is the first time the tagger sees the frame on RX.
* Figure out if we can decode it.
*/
static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
{
if (!dsa_port_is_vlan_filtering(dev->dsa_ptr))
return true;
if (sja1105_is_link_local(skb))
return true;
if (sja1105_is_meta_frame(skb))
return true;
return false;
}
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
struct dsa_switch *ds = dp->ds;
u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
/* Transmitting management traffic does not rely upon switch tagging,
* but instead SPI-installed management routes. Part 2 of this
* is the .port_deferred_xmit driver callback.
*/
if (unlikely(sja1105_is_link_local(skb)))
return dsa_defer_xmit(skb, netdev);
/* If we are under a vlan_filtering bridge, IP termination on
* switch ports based on 802.1Q tags is simply too brittle to
* be passable. So just defer to the dsa_slave_notag_xmit
* implementation.
*/
if (dsa_port_is_vlan_filtering(dp))
return skb;
return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105,
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
}
static void sja1105_transfer_meta(struct sk_buff *skb,
const struct sja1105_meta *meta)
{
struct ethhdr *hdr = eth_hdr(skb);
hdr->h_dest[3] = meta->dmac_byte_3;
hdr->h_dest[4] = meta->dmac_byte_4;
SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp;
}
/* This is a simple state machine which follows the hardware mechanism of
* generating RX timestamps:
*
* After each timestampable skb (all traffic for which send_meta1 and
* send_meta0 is true, aka all MAC-filtered link-local traffic) a meta frame
* containing a partial timestamp is immediately generated by the switch and
* sent as a follow-up to the link-local frame on the CPU port.
*
* The meta frames have no unique identifier (such as sequence number) by which
* one may pair them to the correct timestampable frame.
* Instead, the switch has internal logic that ensures no frames are sent on
* the CPU port between a link-local timestampable frame and its corresponding
* meta follow-up. It also ensures strict ordering between ports (lower ports
* have higher priority towards the CPU port). For this reason, a per-port
* data structure is not needed/desirable.
*
* This function pairs the link-local frame with its partial timestamp from the
* meta follow-up frame. The full timestamp will be reconstructed later in a
* work queue.
*/
static struct sk_buff
*sja1105_rcv_meta_state_machine(struct sk_buff *skb,
struct sja1105_meta *meta,
bool is_link_local,
bool is_meta)
{
struct sja1105_port *sp;
struct dsa_port *dp;
dp = dsa_slave_to_port(skb->dev);
sp = dp->priv;
/* Step 1: A timestampable frame was received.
* Buffer it until we get its meta frame.
*/
if (is_link_local) {
if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
/* Do normal processing. */
return skb;
spin_lock(&sp->data->meta_lock);
/* Was this a link-local frame instead of the meta
* that we were expecting?
*/
if (sp->data->stampable_skb) {
dev_err_ratelimited(dp->ds->dev,
"Expected meta frame, is %12llx "
"in the DSA master multicast filter?\n",
SJA1105_META_DMAC);
kfree_skb(sp->data->stampable_skb);
}
/* Hold a reference to avoid dsa_switch_rcv
* from freeing the skb.
*/
sp->data->stampable_skb = skb_get(skb);
spin_unlock(&sp->data->meta_lock);
/* Tell DSA we got nothing */
return NULL;
/* Step 2: The meta frame arrived.
* Time to take the stampable skb out of the closet, annotate it
* with the partial timestamp, and pretend that we received it
* just now (basically masquerade the buffered frame as the meta
* frame, which serves no further purpose).
*/
} else if (is_meta) {
struct sk_buff *stampable_skb;
/* Drop the meta frame if we're not in the right state
* to process it.
*/
if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
return NULL;
spin_lock(&sp->data->meta_lock);
stampable_skb = sp->data->stampable_skb;
sp->data->stampable_skb = NULL;
/* Was this a meta frame instead of the link-local
* that we were expecting?
*/
if (!stampable_skb) {
dev_err_ratelimited(dp->ds->dev,
"Unexpected meta frame\n");
spin_unlock(&sp->data->meta_lock);
return NULL;
}
if (stampable_skb->dev != skb->dev) {
dev_err_ratelimited(dp->ds->dev,
"Meta frame on wrong port\n");
spin_unlock(&sp->data->meta_lock);
return NULL;
}
/* Free the meta frame and give DSA the buffered stampable_skb
* for further processing up the network stack.
*/
kfree_skb(skb);
skb = stampable_skb;
sja1105_transfer_meta(skb, meta);
spin_unlock(&sp->data->meta_lock);
}
return skb;
}
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev,
struct packet_type *pt)
{
struct sja1105_meta meta = {0};
int source_port, switch_id;
struct vlan_ethhdr *hdr;
u16 tpid, vid, tci;
bool is_link_local;
bool is_tagged;
bool is_meta;
hdr = vlan_eth_hdr(skb);
tpid = ntohs(hdr->h_vlan_proto);
is_tagged = (tpid == ETH_P_SJA1105);
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
skb->offload_fwd_mark = 1;
if (is_tagged) {
/* Normal traffic path. */
tci = ntohs(hdr->h_vlan_TCI);
vid = tci & VLAN_VID_MASK;
source_port = dsa_8021q_rx_source_port(vid);
switch_id = dsa_8021q_rx_switch_id(vid);
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
* the incl_srcpt options.
*/
source_port = hdr->h_dest[3];
switch_id = hdr->h_dest[4];
/* Clear the DMAC bytes that were mangled by the switch */
hdr->h_dest[3] = 0;
hdr->h_dest[4] = 0;
} else if (is_meta) {
sja1105_meta_unpack(skb, &meta);
source_port = meta.source_port;
switch_id = meta.switch_id;
} else {
return NULL;
}
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
}
/* Delete/overwrite fake VLAN header, DSA expects to not find
* it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN).
*/
if (is_tagged)
skb = dsa_8021q_remove_header(skb);
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
static struct dsa_device_ops sja1105_netdev_ops = {
.name = "sja1105",
.proto = DSA_TAG_PROTO_SJA1105,
.xmit = sja1105_xmit,
.rcv = sja1105_rcv,
.filter = sja1105_filter,
.overhead = VLAN_HLEN,
};
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105);
module_dsa_tag_driver(sja1105_netdev_ops);