alistair23-linux/include/uapi/linux/neighbour.h
Roopa Prabhu 3ad7a4b141 vxlan: support fdb and learning in COLLECT_METADATA mode
Vxlan COLLECT_METADATA mode today solves the per-vni netdev
scalability problem in l3 networks. It expects all forwarding
information to be present in dst_metadata. This patch series
enhances collect metadata mode to include the case where only
vni is present in dst_metadata, and the vxlan driver can then use
the rest of the forwarding information datbase to make forwarding
decisions. There is no change to default COLLECT_METADATA
behaviour. These changes only apply to COLLECT_METADATA when
used with the bridging use-case with a special dst_metadata
tunnel info flag (eg: where vxlan device is part of a bridge).
For all this to work, the vxlan driver will need to now support a
single fdb table hashed by mac + vni. This series essentially makes
this happen.

use-case and workflow:
vxlan collect metadata device participates in bridging vlan
to vn-segments. Bridge driver above the vxlan device,
sends the vni corresponding to the vlan in the dst_metadata.
vxlan driver will lookup forwarding database with (mac + vni)
for the required remote destination information to forward the
packet.

Changes introduced by this patch:
    - allow learning and forwarding database state in vxlan netdev in
      COLLECT_METADATA mode. Current behaviour is not changed
      by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
      to support the new bridge friendly mode.
    - A single fdb table hashed by (mac, vni) to allow fdb entries with
      multiple vnis in the same fdb table
    - rx path already has the vni
    - tx path expects a vni in the packet with dst_metadata
    - prior to this series, fdb remote_dsts carried remote vni and
      the vxlan device carrying the fdb table represented the
      source vni. With the vxlan device now representing multiple vnis,
      this patch adds a src vni attribute to the fdb entry. The remote
      vni already uses NDA_VNI attribute. This patch introduces
      NDA_SRC_VNI netlink attribute to represent the src vni in a multi
      vni fdb table.

iproute2 example (patched and pruned iproute2 output to just show
relevant fdb entries):
example shows same host mac learnt on two vni's.

before (netdev per vni):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self

after this patch with collect metadata in bridged mode (single netdev):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-02-03 15:21:21 -05:00

171 lines
4.1 KiB
C

#ifndef __LINUX_NEIGHBOUR_H
#define __LINUX_NEIGHBOUR_H
#include <linux/types.h>
#include <linux/netlink.h>
struct ndmsg {
__u8 ndm_family;
__u8 ndm_pad1;
__u16 ndm_pad2;
__s32 ndm_ifindex;
__u16 ndm_state;
__u8 ndm_flags;
__u8 ndm_type;
};
enum {
NDA_UNSPEC,
NDA_DST,
NDA_LLADDR,
NDA_CACHEINFO,
NDA_PROBES,
NDA_VLAN,
NDA_PORT,
NDA_VNI,
NDA_IFINDEX,
NDA_MASTER,
NDA_LINK_NETNSID,
NDA_SRC_VNI,
__NDA_MAX
};
#define NDA_MAX (__NDA_MAX - 1)
/*
* Neighbor Cache Entry Flags
*/
#define NTF_USE 0x01
#define NTF_SELF 0x02
#define NTF_MASTER 0x04
#define NTF_PROXY 0x08 /* == ATF_PUBL */
#define NTF_EXT_LEARNED 0x10
#define NTF_ROUTER 0x80
/*
* Neighbor Cache Entry States.
*/
#define NUD_INCOMPLETE 0x01
#define NUD_REACHABLE 0x02
#define NUD_STALE 0x04
#define NUD_DELAY 0x08
#define NUD_PROBE 0x10
#define NUD_FAILED 0x20
/* Dummy states */
#define NUD_NOARP 0x40
#define NUD_PERMANENT 0x80
#define NUD_NONE 0x00
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
and make no address resolution or NUD.
NUD_PERMANENT also cannot be deleted by garbage collectors.
*/
struct nda_cacheinfo {
__u32 ndm_confirmed;
__u32 ndm_used;
__u32 ndm_updated;
__u32 ndm_refcnt;
};
/*****************************************************************
* Neighbour tables specific messages.
*
* To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
* NLM_F_DUMP flag set. Every neighbour table configuration is
* spread over multiple messages to avoid running into message
* size limits on systems with many interfaces. The first message
* in the sequence transports all not device specific data such as
* statistics, configuration, and the default parameter set.
* This message is followed by 0..n messages carrying device
* specific parameter sets.
* Although the ordering should be sufficient, NDTA_NAME can be
* used to identify sequences. The initial message can be identified
* by checking for NDTA_CONFIG. The device specific messages do
* not contain this TLV but have NDTPA_IFINDEX set to the
* corresponding interface index.
*
* To change neighbour table attributes, send RTM_SETNEIGHTBL
* with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
* NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
* otherwise. Device specific parameter sets can be changed by
* setting NDTPA_IFINDEX to the interface index of the corresponding
* device.
****/
struct ndt_stats {
__u64 ndts_allocs;
__u64 ndts_destroys;
__u64 ndts_hash_grows;
__u64 ndts_res_failed;
__u64 ndts_lookups;
__u64 ndts_hits;
__u64 ndts_rcv_probes_mcast;
__u64 ndts_rcv_probes_ucast;
__u64 ndts_periodic_gc_runs;
__u64 ndts_forced_gc_runs;
__u64 ndts_table_fulls;
};
enum {
NDTPA_UNSPEC,
NDTPA_IFINDEX, /* u32, unchangeable */
NDTPA_REFCNT, /* u32, read-only */
NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
NDTPA_RETRANS_TIME, /* u64, msecs */
NDTPA_GC_STALETIME, /* u64, msecs */
NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
NDTPA_QUEUE_LEN, /* u32 */
NDTPA_APP_PROBES, /* u32 */
NDTPA_UCAST_PROBES, /* u32 */
NDTPA_MCAST_PROBES, /* u32 */
NDTPA_ANYCAST_DELAY, /* u64, msecs */
NDTPA_PROXY_DELAY, /* u64, msecs */
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES, /* u32 */
NDTPA_MCAST_REPROBES, /* u32 */
NDTPA_PAD,
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
struct ndtmsg {
__u8 ndtm_family;
__u8 ndtm_pad1;
__u16 ndtm_pad2;
};
struct ndt_config {
__u16 ndtc_key_len;
__u16 ndtc_entry_size;
__u32 ndtc_entries;
__u32 ndtc_last_flush; /* delta to now in msecs */
__u32 ndtc_last_rand; /* delta to now in msecs */
__u32 ndtc_hash_rnd;
__u32 ndtc_hash_mask;
__u32 ndtc_hash_chain_gc;
__u32 ndtc_proxy_qlen;
};
enum {
NDTA_UNSPEC,
NDTA_NAME, /* char *, unchangeable */
NDTA_THRESH1, /* u32 */
NDTA_THRESH2, /* u32 */
NDTA_THRESH3, /* u32 */
NDTA_CONFIG, /* struct ndt_config, read-only */
NDTA_PARMS, /* nested TLV NDTPA_* */
NDTA_STATS, /* struct ndt_stats, read-only */
NDTA_GC_INTERVAL, /* u64, msecs */
NDTA_PAD,
__NDTA_MAX
};
#define NDTA_MAX (__NDTA_MAX - 1)
#endif