1
0
Fork 0

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
  IB/ipath: kbuild infrastructure
  IB/ipath: infiniband verbs support
  IB/ipath: misc infiniband code, part 2
  IB/ipath: misc infiniband code, part 1
  IB/ipath: infiniband RC protocol support
  IB/ipath: infiniband UC and UD protocol support
  IB/ipath: infiniband header files
  IB/ipath: layering interfaces used by higher-level driver code
  IB/ipath: support for userspace apps using core driver
  IB/ipath: sysfs and ipathfs support for core driver
  IB/ipath: misc driver support code
  IB/ipath: chip initialisation code, and diag support
  IB/ipath: support for PCI Express devices
  IB/ipath: support for HyperTransport devices
  IB/ipath: core driver header files
  IB/ipath: core device driver
hifive-unleashed-5.1
Linus Torvalds 2006-04-02 12:51:22 -07:00
commit cd5e25d93e
40 changed files with 25108 additions and 0 deletions

View File

@ -1451,6 +1451,12 @@ P: Juanjo Ciarlante
M: jjciarla@raiz.uncu.edu.ar
S: Maintained
IPATH DRIVER:
P: Bryan O'Sullivan
M: support@pathscale.com
L: openib-general@openib.org
S: Supported
IPX NETWORK LAYER
P: Arnaldo Carvalho de Melo
M: acme@conectiva.com.br

View File

@ -69,6 +69,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_MMC) += mmc/
obj-$(CONFIG_NEW_LEDS) += leds/
obj-$(CONFIG_INFINIBAND) += infiniband/
obj-$(CONFIG_IPATH_CORE) += infiniband/
obj-$(CONFIG_SGI_SN) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/

View File

@ -30,6 +30,7 @@ config INFINIBAND_USER_ACCESS
<http://www.openib.org>.
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"

View File

@ -1,4 +1,5 @@
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
obj-$(CONFIG_IPATH_CORE) += hw/ipath/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/

View File

@ -0,0 +1,16 @@
config IPATH_CORE
tristate "PathScale InfiniPath Driver"
depends on 64BIT && PCI_MSI && NET
---help---
This is a low-level driver for PathScale InfiniPath host channel
adapters (HCAs) based on the HT-400 and PE-800 chips.
config INFINIBAND_IPATH
tristate "PathScale InfiniPath Verbs Driver"
depends on IPATH_CORE && INFINIBAND
---help---
This is a driver that provides InfiniBand verbs support for
PathScale InfiniPath host channel adapters (HCAs). This
allows these devices to be used with both kernel upper level
protocols such as IP-over-InfiniBand as well as with userspace
applications (in conjunction with InfiniBand userspace access).

View File

@ -0,0 +1,36 @@
EXTRA_CFLAGS += -DIPATH_IDSTR='"PathScale kernel.org driver"' \
-DIPATH_KERN_TYPE=0
obj-$(CONFIG_IPATH_CORE) += ipath_core.o
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
ipath_core-y := \
ipath_diag.o \
ipath_driver.o \
ipath_eeprom.o \
ipath_file_ops.o \
ipath_fs.o \
ipath_ht400.o \
ipath_init_chip.o \
ipath_intr.o \
ipath_layer.o \
ipath_pe800.o \
ipath_stats.o \
ipath_sysfs.o \
ipath_user_pages.o
ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
ib_ipath-y := \
ipath_cq.o \
ipath_keys.o \
ipath_mad.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
ipath_ruc.o \
ipath_srq.o \
ipath_uc.o \
ipath_ud.o \
ipath_verbs.o \
ipath_verbs_mcast.o

View File

@ -0,0 +1,616 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_COMMON_H
#define _IPATH_COMMON_H
/*
* This file contains defines, structures, etc. that are used
* to communicate between kernel and user code.
*/
/* This is the IEEE-assigned OUI for PathScale, Inc. */
#define IPATH_SRC_OUI_1 0x00
#define IPATH_SRC_OUI_2 0x11
#define IPATH_SRC_OUI_3 0x75
/* version of protocol header (known to chip also). In the long run,
* we should be able to generate and accept a range of version numbers;
* for now we only accept one, and it's compiled in.
*/
#define IPS_PROTO_VERSION 2
/*
* These are compile time constants that you may want to enable or disable
* if you are trying to debug problems with code or performance.
* IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
* fastpath code
* IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
* traced in faspath code
* _IPATH_TRACING define as 0 if you want to remove all tracing in a
* compilation unit
* _IPATH_DEBUGGING define as 0 if you want to remove debug prints
*/
/*
* The value in the BTH QP field that InfiniPath uses to differentiate
* an infinipath protocol IB packet vs standard IB transport
*/
#define IPATH_KD_QP 0x656b79
/*
* valid states passed to ipath_set_linkstate() user call
*/
#define IPATH_IB_LINKDOWN 0
#define IPATH_IB_LINKARM 1
#define IPATH_IB_LINKACTIVE 2
#define IPATH_IB_LINKINIT 3
#define IPATH_IB_LINKDOWN_SLEEP 4
#define IPATH_IB_LINKDOWN_DISABLE 5
/*
* stats maintained by the driver. For now, at least, this is global
* to all minor devices.
*/
struct infinipath_stats {
/* number of interrupts taken */
__u64 sps_ints;
/* number of interrupts for errors */
__u64 sps_errints;
/* number of errors from chip (not incl. packet errors or CRC) */
__u64 sps_errs;
/* number of packet errors from chip other than CRC */
__u64 sps_pkterrs;
/* number of packets with CRC errors (ICRC and VCRC) */
__u64 sps_crcerrs;
/* number of hardware errors reported (parity, etc.) */
__u64 sps_hwerrs;
/* number of times IB link changed state unexpectedly */
__u64 sps_iblink;
/* no longer used; left for compatibility */
__u64 sps_unused3;
/* number of kernel (port0) packets received */
__u64 sps_port0pkts;
/* number of "ethernet" packets sent by driver */
__u64 sps_ether_spkts;
/* number of "ethernet" packets received by driver */
__u64 sps_ether_rpkts;
/* number of SMA packets sent by driver */
__u64 sps_sma_spkts;
/* number of SMA packets received by driver */
__u64 sps_sma_rpkts;
/* number of times all ports rcvhdrq was full and packet dropped */
__u64 sps_hdrqfull;
/* number of times all ports egrtid was full and packet dropped */
__u64 sps_etidfull;
/*
* number of times we tried to send from driver, but no pio buffers
* avail
*/
__u64 sps_nopiobufs;
/* number of ports currently open */
__u64 sps_ports;
/* list of pkeys (other than default) accepted (0 means not set) */
__u16 sps_pkeys[4];
/* lids for up to 4 infinipaths, indexed by infinipath # */
__u16 sps_lid[4];
/* number of user ports per chip (not IB ports) */
__u32 sps_nports;
/* not our interrupt, or already handled */
__u32 sps_nullintr;
/* max number of packets handled per receive call */
__u32 sps_maxpkts_call;
/* avg number of packets handled per receive call */
__u32 sps_avgpkts_call;
/* total number of pages locked */
__u64 sps_pagelocks;
/* total number of pages unlocked */
__u64 sps_pageunlocks;
/*
* Number of packets dropped in kernel other than errors (ether
* packets if ipath not configured, sma/mad, etc.)
*/
__u64 sps_krdrops;
/* mlids for up to 4 infinipaths, indexed by infinipath # */
__u16 sps_mlid[4];
/* pad for future growth */
__u64 __sps_pad[45];
};
/*
* These are the status bits readable (in ascii form, 64bit value)
* from the "status" sysfs file.
*/
#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */
#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
/* Device has been disabled via admin request */
#define IPATH_STATUS_ADMIN_DISABLED 0x4
#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */
#define IPATH_STATUS_SMA 0x10 /* user SMA running */
/* Chip has been found and initted */
#define IPATH_STATUS_CHIP_PRESENT 0x20
/* IB link is at ACTIVE, usable for data traffic */
#define IPATH_STATUS_IB_READY 0x40
/* link is configured, LID, MTU, etc. have been set */
#define IPATH_STATUS_IB_CONF 0x80
/* no link established, probably no cable */
#define IPATH_STATUS_IB_NOCABLE 0x100
/* A Fatal hardware error has occurred. */
#define IPATH_STATUS_HWERROR 0x200
/*
* The list of usermode accessible registers. Also see Reg_* later in file.
*/
typedef enum _ipath_ureg {
/* (RO) DMA RcvHdr to be used next. */
ur_rcvhdrtail = 0,
/* (RW) RcvHdr entry to be processed next by host. */
ur_rcvhdrhead = 1,
/* (RO) Index of next Eager index to use. */
ur_rcvegrindextail = 2,
/* (RW) Eager TID to be processed next */
ur_rcvegrindexhead = 3,
/* For internal use only; max register number. */
_IPATH_UregMax
} ipath_ureg;
/* bit values for spi_runtime_flags */
#define IPATH_RUNTIME_HT 0x1
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
/*
* This structure is returned by ipath_userinit() immediately after
* open to get implementation-specific info, and info specific to this
* instance.
*
* This struct must have explict pad fields where type sizes
* may result in different alignments between 32 and 64 bit
* programs, since the 64 bit * bit kernel requires the user code
* to have matching offsets
*/
struct ipath_base_info {
/* version of hardware, for feature checking. */
__u32 spi_hw_version;
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
__u32 spi_port;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
*/
__u32 spi_mtu;
/*
* Size of a PIO buffer. Any given packet's total size must be less
* than this (in words). Included is the starting control word, so
* if 513 is returned, then total pkt size is 512 words or less.
*/
__u32 spi_piosize;
/* size of the TID cache in infinipath, in entries */
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
/* size of a single receive header queue entry. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
* This may be less than the spu_rcvhdrcnt passed in!.
*/
__u32 spi_rcvhdr_cnt;
/* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
__u32 spi_runtime_flags;
/* address where receive buffer queue is mapped into */
__u64 spi_rcvhdr_base;
/* user program. */
/* base address of eager TID receive buffers. */
__u64 spi_rcv_egrbufs;
/* Allocated by initialization code, not by protocol. */
/*
* Size of each TID buffer in host memory, starting at
* spi_rcv_egrbufs. The buffers are virtually contiguous.
*/
__u32 spi_rcv_egrbufsize;
/*
* The special QP (queue pair) value that identifies an infinipath
* protocol packet from standard IB packets. More, probably much
* more, to be added.
*/
__u32 spi_qpair;
/*
* User register base for init code, not to be used directly by
* protocol or applications.
*/
__u64 __spi_uregbase;
/*
* Maximum buffer size in bytes that can be used in a single TID
* entry (assuming the buffer is aligned to this boundary). This is
* the minimum of what the hardware and software support Guaranteed
* to be a power of 2.
*/
__u32 spi_tid_maxsize;
/*
* alignment of each pio send buffer (byte count
* to add to spi_piobufbase to get to second buffer)
*/
__u32 spi_pioalign;
/*
* The index of the first pio buffer available to this process;
* needed to do lookup in spi_pioavailaddr; not added to
* spi_piobufbase.
*/
__u32 spi_pioindex;
/* number of buffers mapped for this process */
__u32 spi_piocnt;
/*
* Base address of writeonly pio buffers for this process.
* Each buffer has spi_piosize words, and is aligned on spi_pioalign
* boundaries. spi_piocnt buffers are mapped from this address
*/
__u64 spi_piobufbase;
/*
* Base address of readonly memory copy of the pioavail registers.
* There are 2 bits for each buffer.
*/
__u64 spi_pioavailaddr;
/*
* Address where driver updates a copy of the interface and driver
* status (IPATH_STATUS_*) as a 64 bit value. It's followed by a
* string indicating hardware error, if there was one.
*/
__u64 spi_status;
/* number of chip ports available to user processes */
__u32 spi_nports;
/* unit number of chip we are using */
__u32 spi_unit;
/* num bufs in each contiguous set */
__u32 spi_rcv_egrperchunk;
/* size in bytes of each contiguous set */
__u32 spi_rcv_egrchunksize;
/* total size of mmap to cover full rcvegrbuffers */
__u32 spi_rcv_egrbuftotlen;
} __attribute__ ((aligned(8)));
/*
* This version number is given to the driver by the user code during
* initialization in the spu_userversion field of ipath_user_info, so
* the driver can check for compatibility with user code.
*
* The major version changes when data structures
* change in an incompatible way. The driver must be the same or higher
* for initialization to succeed. In some cases, a higher version
* driver will not interoperate with older software, and initialization
* will return an error.
*/
#define IPATH_USER_SWMAJOR 1
/*
* Minor version differences are always compatible
* a within a major version, however if if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference
*/
#define IPATH_USER_SWMINOR 2
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
#define IPATH_KERN_TYPE 0
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
* part of a PathScale release, or from the driver from OpenIB, kernel.org,
* or a standard distribution, for support reasons. The high bit is 0 for
* non-PathScale, and 1 for PathScale-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of ipath_base_info, so the user code can in turn
* check for compatibility with the kernel.
*/
#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
/*
* This structure is passed to ipath_userinit() to tell the driver where
* user code buffers are, sizes, etc. The offsets and sizes of the
* fields must remain unchanged, for binary compatibility. It can
* be extended, if userversion is changed so user code can tell, if needed
*/
struct ipath_user_info {
/*
* version of user software, to detect compatibility issues.
* Should be set to IPATH_USER_SWVERSION.
*/
__u32 spu_userversion;
/* desired number of receive header queue entries */
__u32 spu_rcvhdrcnt;
/* size of struct base_info to write to */
__u32 spu_base_info_size;
/*
* number of words in KD protocol header
* This tells InfiniPath how many words to copy to rcvhdrq. If 0,
* kernel uses a default. Once set, attempts to set any other value
* are an error (EAGAIN) until driver is reloaded.
*/
__u32 spu_rcvhdrsize;
/*
* cache line aligned (64 byte) user address to
* which the rcvhdrtail register will be written by infinipath
* whenever it changes, so that no chip registers are read in
* the performance path.
*/
__u64 spu_rcvhdraddr;
/*
* address of struct base_info to write to
*/
__u64 spu_base_info;
} __attribute__ ((aligned(8)));
/* User commands. */
#define IPATH_CMD_MIN 16
#define IPATH_CMD_USER_INIT 16 /* set up userspace */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
#define IPATH_CMD_MAX 21
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
__u32 port; /* port on unit assigned to caller */
};
struct ipath_tid_info {
__u32 tidcnt;
/* make structure same size in 32 and 64 bit */
__u32 tid__unused;
/* virtual address of first page in transfer */
__u64 tidvaddr;
/* pointer (same size 32/64 bit) to __u16 tid array */
__u64 tidlist;
/*
* pointer (same size 32/64 bit) to bitmap of TIDs used
* for this call; checked for being large enough at open
*/
__u64 tidmap;
};
struct ipath_cmd {
__u32 type; /* command type */
union {
struct ipath_tid_info tid_info;
struct ipath_user_info user_info;
/* address in userspace of struct ipath_port_info to
write result to */
__u64 port_info;
/* enable/disable receipt of packets */
__u32 recv_ctrl;
/* partition key to set */
__u16 part_key;
} cmd;
};
struct ipath_iovec {
/* Pointer to data, but same size 32 and 64 bit */
__u64 iov_base;
/*
* Length of data; don't need 64 bits, but want
* ipath_sendpkt to remain same size as before 32 bit changes, so...
*/
__u64 iov_len;
};
/*
* Describes a single packet for send. Each packet can have one or more
* buffers, but the total length (exclusive of IB headers) must be less
* than the MTU, and if using the PIO method, entire packet length,
* including IB headers, must be less than the ipath_piosize value (words).
* Use of this necessitates including sys/uio.h
*/
struct __ipath_sendpkt {
__u32 sps_flags; /* flags for packet (TBD) */
__u32 sps_cnt; /* number of entries to use in sps_iov */
/* array of iov's describing packet. TEMPORARY */
struct ipath_iovec sps_iov[4];
};
/* Passed into SMA special file's ->read and ->write methods. */
struct ipath_sma_pkt
{
__u32 unit; /* unit on which to send packet */
__u64 data; /* address of payload in userspace */
__u32 len; /* length of payload */
};
/*
* Data layout in I2C flash (for GUID, etc.)
* All fields are little-endian binary unless otherwise stated
*/
#define IPATH_FLASH_VERSION 1
struct ipath_flash {
/* flash layout version (IPATH_FLASH_VERSION) */
__u8 if_fversion;
/* checksum protecting if_length bytes */
__u8 if_csum;
/*
* valid length (in use, protected by if_csum), including
* if_fversion and if_sum themselves)
*/
__u8 if_length;
/* the GUID, in network order */
__u8 if_guid[8];
/* number of GUIDs to use, starting from if_guid */
__u8 if_numguid;
/* the board serial number, in ASCII */
char if_serial[12];
/* board mfg date (YYYYMMDD ASCII) */
char if_mfgdate[8];
/* last board rework/test date (YYYYMMDD ASCII) */
char if_testdate[8];
/* logging of error counts, TBD */
__u8 if_errcntp[4];
/* powered on hours, updated at driver unload */
__u8 if_powerhour[2];
/* ASCII free-form comment field */
char if_comment[32];
/* 78 bytes used, min flash size is 128 bytes */
__u8 if_future[50];
};
/*
* These are the counters implemented in the chip, and are listed in order.
* The InterCaps naming is taken straight from the chip spec.
*/
struct infinipath_counters {
__u64 LBIntCnt;
__u64 LBFlowStallCnt;
__u64 Reserved1;
__u64 TxUnsupVLErrCnt;
__u64 TxDataPktCnt;
__u64 TxFlowPktCnt;
__u64 TxDwordCnt;
__u64 TxLenErrCnt;
__u64 TxMaxMinLenErrCnt;
__u64 TxUnderrunCnt;
__u64 TxFlowStallCnt;
__u64 TxDroppedPktCnt;
__u64 RxDroppedPktCnt;
__u64 RxDataPktCnt;
__u64 RxFlowPktCnt;
__u64 RxDwordCnt;
__u64 RxLenErrCnt;
__u64 RxMaxMinLenErrCnt;
__u64 RxICRCErrCnt;
__u64 RxVCRCErrCnt;
__u64 RxFlowCtrlErrCnt;
__u64 RxBadFormatCnt;
__u64 RxLinkProblemCnt;
__u64 RxEBPCnt;
__u64 RxLPCRCErrCnt;
__u64 RxBufOvflCnt;
__u64 RxTIDFullErrCnt;
__u64 RxTIDValidErrCnt;
__u64 RxPKeyMismatchCnt;
__u64 RxP0HdrEgrOvflCnt;
__u64 RxP1HdrEgrOvflCnt;
__u64 RxP2HdrEgrOvflCnt;
__u64 RxP3HdrEgrOvflCnt;
__u64 RxP4HdrEgrOvflCnt;
__u64 RxP5HdrEgrOvflCnt;
__u64 RxP6HdrEgrOvflCnt;
__u64 RxP7HdrEgrOvflCnt;
__u64 RxP8HdrEgrOvflCnt;
__u64 Reserved6;
__u64 Reserved7;
__u64 IBStatusChangeCnt;
__u64 IBLinkErrRecoveryCnt;
__u64 IBLinkDownedCnt;
__u64 IBSymbolErrCnt;
};
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software
* The other bits that are used only by the driver or diags are in
* ipath_registers.h
*/
/* RcvHdrFlags bits */
#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
#define INFINIPATH_RHF_LENGTH_SHIFT 0
#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
#define INFINIPATH_RHF_EGRINDEX_MASK 0x7FF
#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
#define INFINIPATH_RHF_H_ICRCERR 0x80000000
#define INFINIPATH_RHF_H_VCRCERR 0x40000000
#define INFINIPATH_RHF_H_PARITYERR 0x20000000
#define INFINIPATH_RHF_H_LENERR 0x10000000
#define INFINIPATH_RHF_H_MTUERR 0x08000000
#define INFINIPATH_RHF_H_IHDRERR 0x04000000
#define INFINIPATH_RHF_H_TIDERR 0x02000000
#define INFINIPATH_RHF_H_MKERR 0x01000000
#define INFINIPATH_RHF_H_IBERR 0x00800000
#define INFINIPATH_RHF_L_SWA 0x00008000
#define INFINIPATH_RHF_L_SWB 0x00004000
/* infinipath header fields */
#define INFINIPATH_I_VERS_MASK 0xF
#define INFINIPATH_I_VERS_SHIFT 28
#define INFINIPATH_I_PORT_MASK 0xF
#define INFINIPATH_I_PORT_SHIFT 24
#define INFINIPATH_I_TID_MASK 0x7FF
#define INFINIPATH_I_TID_SHIFT 13
#define INFINIPATH_I_OFFSET_MASK 0x1FFF
#define INFINIPATH_I_OFFSET_SHIFT 0
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
/* SendPIO per-buffer control */
#define INFINIPATH_SP_LENGTHP1_MASK 0x3FF
#define INFINIPATH_SP_LENGTHP1_SHIFT 0
#define INFINIPATH_SP_INTR 0x80000000
#define INFINIPATH_SP_TEST 0x40000000
#define INFINIPATH_SP_TESTEBP 0x20000000
/* SendPIOAvail bits */
#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
#endif /* _IPATH_COMMON_H */

View File

@ -0,0 +1,295 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
/**
* ipath_cq_enter - add a new entry to the completion queue
* @cq: completion queue
* @entry: work completion entry to add
* @sig: true if @entry is a solicitated entry
*
* This may be called with one of the qp->s_lock or qp->r_rq.lock held.
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
unsigned long flags;
u32 next;
spin_lock_irqsave(&cq->lock, flags);
if (cq->head == cq->ibcq.cqe)
next = 0;
else
next = cq->head + 1;
if (unlikely(next == cq->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
return;
}
cq->queue[cq->head] = *entry;
cq->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = IB_CQ_NONE;
cq->triggered++;
/*
* This will cause send_complete() to be called in
* another thread.
*/
tasklet_hi_schedule(&cq->comptask);
}
spin_unlock_irqrestore(&cq->lock, flags);
if (entry->status != IB_WC_SUCCESS)
to_idev(cq->ibcq.device)->n_wqe_errs++;
}
/**
* ipath_poll_cq - poll for work completion entries
* @ibcq: the completion queue to poll
* @num_entries: the maximum number of entries to return
* @entry: pointer to array where work completions are placed
*
* Returns the number of completion entries polled.
*
* This may be called from interrupt context. Also called by ib_poll_cq()
* in the generic verbs code.
*/
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
unsigned long flags;
int npolled;
spin_lock_irqsave(&cq->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
if (cq->tail == cq->head)
break;
*entry = cq->queue[cq->tail];
if (cq->tail == cq->ibcq.cqe)
cq->tail = 0;
else
cq->tail++;
}
spin_unlock_irqrestore(&cq->lock, flags);
return npolled;
}
static void send_complete(unsigned long data)
{
struct ipath_cq *cq = (struct ipath_cq *)data;
/*
* The completion handler will most likely rearm the notification
* and poll for all pending entries. If a new completion entry
* is added while we are in this routine, tasklet_hi_schedule()
* won't call us again until we return so we check triggered to
* see if we need to call the handler again.
*/
for (;;) {
u8 triggered = cq->triggered;
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
if (cq->triggered == triggered)
return;
}
}
/**
* ipath_create_cq - create a completion queue
* @ibdev: the device this completion queue is attached to
* @entries: the minimum size of the completion queue
* @context: unused by the InfiniPath driver
* @udata: unused by the InfiniPath driver
*
* Returns a pointer to the completion queue or negative errno values
* for failure.
*
* Called by ib_create_cq() in the generic verbs code.
*/
struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct ipath_cq *cq;
struct ib_wc *wc;
struct ib_cq *ret;
/*
* Need to use vmalloc() if we want to support large #s of
* entries.
*/
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
wc = vmalloc(sizeof(*wc) * (entries + 1));
if (!wc) {
kfree(cq);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
* an error.
*/
cq->ibcq.cqe = entries;
cq->notify = IB_CQ_NONE;
cq->triggered = 0;
spin_lock_init(&cq->lock);
tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
cq->head = 0;
cq->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
bail:
return ret;
}
/**
* ipath_destroy_cq - destroy a completion queue
* @ibcq: the completion queue to destroy.
*
* Returns 0 for success.
*
* Called by ib_destroy_cq() in the generic verbs code.
*/
int ipath_destroy_cq(struct ib_cq *ibcq)
{
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
vfree(cq->queue);
kfree(cq);
return 0;
}
/**
* ipath_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
* @notify: the type of notification to request
*
* Returns 0 for success.
*
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
*/
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
{
struct ipath_cq *cq = to_icq(ibcq);
unsigned long flags;
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
* any other transitions.
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify;
spin_unlock_irqrestore(&cq->lock, flags);
return 0;
}
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ib_wc *wc, *old_wc;
u32 n;
int ret;
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
wc = vmalloc(sizeof(*wc) * (cqe + 1));
if (!wc) {
ret = -ENOMEM;
goto bail;
}
spin_lock_irq(&cq->lock);
if (cq->head < cq->tail)
n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
else
n = cq->head - cq->tail;
if (unlikely((u32)cqe < n)) {
spin_unlock_irq(&cq->lock);
vfree(wc);
ret = -EOVERFLOW;
goto bail;
}
for (n = 0; cq->tail != cq->head; n++) {
wc[n] = cq->queue[cq->tail];
if (cq->tail == cq->ibcq.cqe)
cq->tail = 0;
else
cq->tail++;
}
cq->ibcq.cqe = cqe;
cq->head = n;
cq->tail = 0;
old_wc = cq->queue;
cq->queue = wc;
spin_unlock_irq(&cq->lock);
vfree(old_wc);
ret = 0;
bail:
return ret;
}

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_DEBUG_H
#define _IPATH_DEBUG_H
#ifndef _IPATH_DEBUGGING /* debugging enabled or not */
#define _IPATH_DEBUGGING 1
#endif
#if _IPATH_DEBUGGING
/*
* Mask values for debugging. The scheme allows us to compile out any
* of the debug tracing stuff, and if compiled in, to enable or disable
* dynamically. This can be set at modprobe time also:
* modprobe infinipath.ko infinipath_debug=7
*/
#define __IPATH_INFO 0x1 /* generic low verbosity stuff */
#define __IPATH_DBG 0x2 /* generic debug */
#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */
/* leave some low verbosity spots open */
#define __IPATH_VERBDBG 0x40 /* very verbose debug */
#define __IPATH_PKTDBG 0x80 /* print packet data */
/* print process startup (init)/exit messages */
#define __IPATH_PROCDBG 0x100
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x200
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
#define __IPATH_SMADBG 0x8000 /* sma packet debug */
#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) general debug on */
#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings on */
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors on */
#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump on */
#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump on */
#else /* _IPATH_DEBUGGING */
/*
* define all of these even with debugging off, for the few places that do
* if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
* compiler eliminate the code
*/
#define __IPATH_INFO 0x0 /* generic low verbosity stuff */
#define __IPATH_DBG 0x0 /* generic debug */
#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */
#define __IPATH_VERBDBG 0x0 /* very verbose debug */
#define __IPATH_PKTDBG 0x0 /* print packet data */
#define __IPATH_PROCDBG 0x0 /* print process startup (init)/exit messages */
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x0
#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
#define __IPATH_SMADBG 0x0 /* print process startup (init)/exit messages */#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
#endif /* _IPATH_DEBUGGING */
#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
#endif /* _IPATH_DEBUG_H */

View File

@ -0,0 +1,379 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This file contains support for diagnostic functions. It is accessed by
* opening the ipath_diag device, normally minor number 129. Diagnostic use
* of the InfiniPath chip may render the chip or board unusable until the
* driver is unloaded, or in some cases, until the system is rebooted.
*
* Accesses to the chip through this interface are not similar to going
* through the /sys/bus/pci resource mmap interface.
*/
#include <linux/pci.h>
#include <asm/uaccess.h>
#include "ipath_common.h"
#include "ipath_kernel.h"
#include "ips_common.h"
#include "ipath_layer.h"
int ipath_diag_inuse;
static int diag_set_link;
static int ipath_diag_open(struct inode *in, struct file *fp);
static int ipath_diag_release(struct inode *in, struct file *fp);
static ssize_t ipath_diag_read(struct file *fp, char __user *data,
size_t count, loff_t *off);
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off);
static struct file_operations diag_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diag_write,
.read = ipath_diag_read,
.open = ipath_diag_open,
.release = ipath_diag_release
};
static struct cdev *diag_cdev;
static struct class_device *diag_class_dev;
int ipath_diag_init(void)
{
return ipath_cdev_init(IPATH_DIAG_MINOR, "ipath_diag",
&diag_file_ops, &diag_cdev, &diag_class_dev);
}
void ipath_diag_cleanup(void)
{
ipath_cdev_cleanup(&diag_cdev, &diag_class_dev);
}
/**
* ipath_read_umem64 - read a 64-bit quantity from the chip into user space
* @dd: the infinipath device
* @uaddr: the location to store the data in user memory
* @caddr: the source chip address (full pointer, not offset)
* @count: number of bytes to copy (multiple of 32 bits)
*
* This function also localizes all chip memory accesses.
* The copy should be written such that we read full cacheline packets
* from the chip. This is usually used for a single qword
*
* NOTE: This assumes the chip address is 64-bit aligned.
*/
static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
const void __iomem *caddr, size_t count)
{
const u64 __iomem *reg_addr = caddr;
const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
int ret;
/* not very efficient, but it works for now */
if (reg_addr < dd->ipath_kregbase ||
reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u64 data = readq(reg_addr);
if (copy_to_user(uaddr, &data, sizeof(u64))) {
ret = -EFAULT;
goto bail;
}
reg_addr++;
uaddr++;
}
ret = 0;
bail:
return ret;
}
/**
* ipath_write_umem64 - write a 64-bit quantity to the chip from user space
* @dd: the infinipath device
* @caddr: the destination chip address (full pointer, not offset)
* @uaddr: the source of the data in user memory
* @count: the number of bytes to copy (multiple of 32 bits)
*
* This is usually used for a single qword
* NOTE: This assumes the chip address is 64-bit aligned.
*/
static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
const void __user *uaddr, size_t count)
{
u64 __iomem *reg_addr = caddr;
const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
int ret;
/* not very efficient, but it works for now */
if (reg_addr < dd->ipath_kregbase ||
reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u64 data;
if (copy_from_user(&data, uaddr, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
writeq(data, reg_addr);
reg_addr++;
uaddr++;
}
ret = 0;
bail:
return ret;
}
/**
* ipath_read_umem32 - read a 32-bit quantity from the chip into user space
* @dd: the infinipath device
* @uaddr: the location to store the data in user memory
* @caddr: the source chip address (full pointer, not offset)
* @count: number of bytes to copy
*
* read 32 bit values, not 64 bit; for memories that only
* support 32 bit reads; usually a single dword.
*/
static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
const void __iomem *caddr, size_t count)
{
const u32 __iomem *reg_addr = caddr;
const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
int ret;
if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
reg_end > (u32 __iomem *) dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
/* not very efficient, but it works for now */
while (reg_addr < reg_end) {
u32 data = readl(reg_addr);
if (copy_to_user(uaddr, &data, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
reg_addr++;
uaddr++;
}
ret = 0;
bail:
return ret;
}
/**
* ipath_write_umem32 - write a 32-bit quantity to the chip from user space
* @dd: the infinipath device
* @caddr: the destination chip address (full pointer, not offset)
* @uaddr: the source of the data in user memory
* @count: number of bytes to copy
*
* write 32 bit values, not 64 bit; for memories that only
* support 32 bit write; usually a single dword.
*/
static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
const void __user *uaddr, size_t count)
{
u32 __iomem *reg_addr = caddr;
const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
int ret;
if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
reg_end > (u32 __iomem *) dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u32 data;
if (copy_from_user(&data, uaddr, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
writel(data, reg_addr);
reg_addr++;
uaddr++;
}
ret = 0;
bail:
return ret;
}
static int ipath_diag_open(struct inode *in, struct file *fp)
{
struct ipath_devdata *dd;
int unit = 0; /* XXX this is bogus */
unsigned long flags;
int ret;
dd = ipath_lookup(unit);
mutex_lock(&ipath_mutex);
spin_lock_irqsave(&ipath_devs_lock, flags);
if (ipath_diag_inuse) {
ret = -EBUSY;
goto bail;
}
list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
/*
* we need at least one infinipath device to be present
* (don't use INITTED, because we want to be able to open
* even if device is in freeze mode, which cleared INITTED).
* There is a small amount of risk to this, which is why we
* also verify kregbase is set.
*/
if (!(dd->ipath_flags & IPATH_PRESENT) ||
!dd->ipath_kregbase)
continue;
ipath_diag_inuse = 1;
diag_set_link = 0;
ret = 0;
goto bail;
}
ret = -ENODEV;
bail:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
mutex_unlock(&ipath_mutex);
/* Only expose a way to reset the device if we
make it into diag mode. */
if (ret == 0)
ipath_expose_reset(&dd->pcidev->dev);
return ret;
}
static int ipath_diag_release(struct inode *i, struct file *f)
{
mutex_lock(&ipath_mutex);
ipath_diag_inuse = 0;
mutex_unlock(&ipath_mutex);
return 0;
}
static ssize_t ipath_diag_read(struct file *fp, char __user *data,
size_t count, loff_t *off)
{
int unit = 0; /* XXX provide for reads on other units some day */
struct ipath_devdata *dd;
void __iomem *kreg_base;
ssize_t ret;
dd = ipath_lookup(unit);
if (!dd) {
ret = -ENODEV;
goto bail;
}
kreg_base = dd->ipath_kregbase;
if (count == 0)
ret = 0;
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit reads */
ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
else
ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
if (ret >= 0) {
*off += count;
ret = count;
}
bail:
return ret;
}
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
int unit = 0; /* XXX this is bogus */
struct ipath_devdata *dd;
void __iomem *kreg_base;
ssize_t ret;
dd = ipath_lookup(unit);
if (!dd) {
ret = -ENODEV;
goto bail;
}
kreg_base = dd->ipath_kregbase;
if (count == 0)
ret = 0;
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit writes */
ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
else
ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
if (ret >= 0) {
*off += count;
ret = count;
}
bail:
return ret;
}
void ipath_diag_bringup_link(struct ipath_devdata *dd)
{
if (diag_set_link || (dd->ipath_flags & IPATH_LINKACTIVE))
return;
diag_set_link = 1;
ipath_cdbg(VERBOSE, "Trying to set to set link active for "
"diag pkt\n");
ipath_layer_set_linkstate(dd, IPATH_IB_LINKARM);
ipath_layer_set_linkstate(dd, IPATH_IB_LINKACTIVE);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,613 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
/*
* InfiniPath I2C driver for a serial eeprom. This is not a generic
* I2C interface. For a start, the device we're using (Atmel AT24C11)
* doesn't work like a regular I2C device. It looks like one
* electrically, but not logically. Normal I2C devices have a single
* 7-bit or 10-bit I2C address that they respond to. Valid 7-bit
* addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78
* to 0x7F are special reserved addresses (e.g. 0x00 is the "general
* call" address.) The Atmel device, on the other hand, responds to ALL
* 7-bit addresses. It's designed to be the only device on a given I2C
* bus. A 7-bit address corresponds to the memory address within the
* Atmel device itself.
*
* Also, the timing requirements mean more than simple software
* bitbanging, with readbacks from chip to ensure timing (simple udelay
* is not enough).
*
* This all means that accessing the device is specialized enough
* that using the standard kernel I2C bitbanging interface would be
* impossible. For example, the core I2C eeprom driver expects to find
* a device at one or more of a limited set of addresses only. It doesn't
* allow writing to an eeprom. It also doesn't provide any means of
* accessing eeprom contents from within the kernel, only via sysfs.
*/
enum i2c_type {
i2c_line_scl = 0,
i2c_line_sda
};
enum i2c_state {
i2c_line_low = 0,
i2c_line_high
};
#define READ_CMD 1
#define WRITE_CMD 0
static int eeprom_init;
/*
* The gpioval manipulation really should be protected by spinlocks
* or be converted to use atomic operations.
*/
/**
* i2c_gpio_set - set a GPIO line
* @dd: the infinipath device
* @line: the line to set
* @new_line_state: the state to set
*
* Returns 0 if the line was set to the new state successfully, non-zero
* on error.
*/
static int i2c_gpio_set(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state new_line_state)
{
u64 read_val, write_val, mask, *gpioval;
gpioval = &dd->ipath_gpio_out;
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
if (line == i2c_line_scl)
mask = ipath_gpio_scl;
else
mask = ipath_gpio_sda;
if (new_line_state == i2c_line_high)
/* tri-state the output rather than force high */
write_val = read_val & ~mask;
else
/* config line to be an output */
write_val = read_val | mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
/* set high and verify */
if (new_line_state == i2c_line_high)
write_val = 0x1UL;
else
write_val = 0x0UL;
if (line == i2c_line_scl) {
write_val <<= ipath_gpio_scl_num;
*gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
*gpioval |= write_val;
} else {
write_val <<= ipath_gpio_sda_num;
*gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
*gpioval |= write_val;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
return 0;
}
/**
* i2c_gpio_get - get a GPIO line state
* @dd: the infinipath device
* @line: the line to get
* @curr_statep: where to put the line state
*
* Returns 0 if the line was set to the new state successfully, non-zero
* on error. curr_state is not set on error.
*/
static int i2c_gpio_get(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state *curr_statep)
{
u64 read_val, write_val, mask;
int ret;
/* check args */
if (curr_statep == NULL) {
ret = 1;
goto bail;
}
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
mask = ipath_gpio_scl;
else
mask = ipath_gpio_sda;
write_val = read_val & ~mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
if (read_val & mask)
*curr_statep = i2c_line_high;
else
*curr_statep = i2c_line_low;
ret = 0;
bail:
return ret;
}
/**
* i2c_wait_for_writes - wait for a write
* @dd: the infinipath device
*
* We use this instead of udelay directly, so we can make sure
* that previous register writes have been flushed all the way
* to the chip. Since we are delaying anyway, the cost doesn't
* hurt, and makes the bit twiddling more regular
*/
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
{
i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
i2c_wait_for_writes(dd);
}
static void sda_out(struct ipath_devdata *dd, u8 bit)
{
i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
i2c_wait_for_writes(dd);
}
static u8 sda_in(struct ipath_devdata *dd, int wait)
{
enum i2c_state bit;
if (i2c_gpio_get(dd, i2c_line_sda, &bit))
ipath_dbg("get bit failed!\n");
if (wait)
i2c_wait_for_writes(dd);
return bit == i2c_line_high ? 1U : 0;
}
/**
* i2c_ackrcv - see if ack following write is true
* @dd: the infinipath device
*/
static int i2c_ackrcv(struct ipath_devdata *dd)
{
u8 ack_received;
/* AT ENTRY SCL = LOW */
/* change direction, ignore data */
ack_received = sda_in(dd, 1);
scl_out(dd, i2c_line_high);
ack_received = sda_in(dd, 1) == 0;
scl_out(dd, i2c_line_low);
return ack_received;
}
/**
* wr_byte - write a byte, one bit at a time
* @dd: the infinipath device
* @data: the byte to write
*
* Returns 0 if we got the following ack, otherwise 1
*/
static int wr_byte(struct ipath_devdata *dd, u8 data)
{
int bit_cntr;
u8 bit;
for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
bit = (data >> bit_cntr) & 1;
sda_out(dd, bit);
scl_out(dd, i2c_line_high);
scl_out(dd, i2c_line_low);
}
return (!i2c_ackrcv(dd)) ? 1 : 0;
}
static void send_ack(struct ipath_devdata *dd)
{
sda_out(dd, i2c_line_low);
scl_out(dd, i2c_line_high);
scl_out(dd, i2c_line_low);
sda_out(dd, i2c_line_high);
}
/**
* i2c_startcmd - transmit the start condition, followed by address/cmd
* @dd: the infinipath device
* @offset_dir: direction byte
*
* (both clock/data high, clock high, data low while clock is high)
*/
static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
{
int res;
/* issue start sequence */
sda_out(dd, i2c_line_high);
scl_out(dd, i2c_line_high);
sda_out(dd, i2c_line_low);
scl_out(dd, i2c_line_low);
/* issue length and direction byte */
res = wr_byte(dd, offset_dir);
if (res)
ipath_cdbg(VERBOSE, "No ack to complete start\n");
return res;
}
/**
* stop_cmd - transmit the stop condition
* @dd: the infinipath device
*
* (both clock/data low, clock high, data high while clock is high)
*/
static void stop_cmd(struct ipath_devdata *dd)
{
scl_out(dd, i2c_line_low);
sda_out(dd, i2c_line_low);
scl_out(dd, i2c_line_high);
sda_out(dd, i2c_line_high);
udelay(2);
}
/**
* eeprom_reset - reset I2C communication
* @dd: the infinipath device
*/
static int eeprom_reset(struct ipath_devdata *dd)
{
int clock_cycles_left = 9;
u64 *gpioval = &dd->ipath_gpio_out;
int ret;
eeprom_init = 1;
*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
"is %llx\n", (unsigned long long) *gpioval);
/*
* This is to get the i2c into a known state, by first going low,
* then tristate sda (and then tristate scl as first thing
* in loop)
*/
scl_out(dd, i2c_line_low);
sda_out(dd, i2c_line_high);
while (clock_cycles_left--) {
scl_out(dd, i2c_line_high);
if (sda_in(dd, 0)) {
sda_out(dd, i2c_line_low);
scl_out(dd, i2c_line_low);
ret = 0;
goto bail;
}
scl_out(dd, i2c_line_low);
}
ret = 1;
bail:
return ret;
}
/**
* ipath_eeprom_read - receives bytes from the eeprom via I2C
* @dd: the infinipath device
* @eeprom_offset: address to read from
* @buffer: where to store result
* @len: number of bytes to receive
*/
int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
void *buffer, int len)
{
/* compiler complains unless initialized */
u8 single_byte = 0;
int bit_cntr;
int ret;
if (!eeprom_init)
eeprom_reset(dd);
eeprom_offset = (eeprom_offset << 1) | READ_CMD;
if (i2c_startcmd(dd, eeprom_offset)) {
ipath_dbg("Failed startcmd\n");
stop_cmd(dd);
ret = 1;
goto bail;
}
/*
* eeprom keeps clocking data out as long as we ack, automatically
* incrementing the address.
*/
while (len-- > 0) {
/* get data */
single_byte = 0;
for (bit_cntr = 8; bit_cntr; bit_cntr--) {
u8 bit;
scl_out(dd, i2c_line_high);
bit = sda_in(dd, 0);
single_byte |= bit << (bit_cntr - 1);
scl_out(dd, i2c_line_low);
}
/* send ack if not the last byte */
if (len)
send_ack(dd);
*((u8 *) buffer) = single_byte;
buffer++;
}
stop_cmd(dd);
ret = 0;
bail:
return ret;
}
/**
* ipath_eeprom_write - writes data to the eeprom via I2C
* @dd: the infinipath device
* @eeprom_offset: where to place data
* @buffer: data to write
* @len: number of bytes to write
*/
int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
const void *buffer, int len)
{
u8 single_byte;
int sub_len;
const u8 *bp = buffer;
int max_wait_time, i;
int ret;
if (!eeprom_init)
eeprom_reset(dd);
while (len > 0) {
if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) {
ipath_dbg("Failed to start cmd offset %u\n",
eeprom_offset);
goto failed_write;
}
sub_len = min(len, 4);
eeprom_offset += sub_len;
len -= sub_len;
for (i = 0; i < sub_len; i++) {
if (wr_byte(dd, *bp++)) {
ipath_dbg("no ack after byte %u/%u (%u "
"total remain)\n", i, sub_len,
len + sub_len - i);
goto failed_write;
}
}
stop_cmd(dd);
/*
* wait for write complete by waiting for a successful
* read (the chip replies with a zero after the write
* cmd completes, and before it writes to the eeprom.
* The startcmd for the read will fail the ack until
* the writes have completed. We do this inline to avoid
* the debug prints that are in the real read routine
* if the startcmd fails.
*/
max_wait_time = 100;
while (i2c_startcmd(dd, READ_CMD)) {
stop_cmd(dd);
if (!--max_wait_time) {
ipath_dbg("Did not get successful read to "
"complete write\n");
goto failed_write;
}
}
/* now read the zero byte */
for (i = single_byte = 0; i < 8; i++) {
u8 bit;
scl_out(dd, i2c_line_high);
bit = sda_in(dd, 0);
scl_out(dd, i2c_line_low);
single_byte <<= 1;
single_byte |= bit;
}
stop_cmd(dd);
}
ret = 0;
goto bail;
failed_write:
stop_cmd(dd);
ret = 1;
bail:
return ret;
}
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
u8 *ip = (u8 *) ifp;
u8 csum = 0, len;
for (len = 0; len < ifp->if_length; len++)
csum += *ip++;
csum -= ifp->if_csum;
csum = ~csum;
if (adjust)
ifp->if_csum = csum;
return csum;
}
/**
* ipath_get_guid - get the GUID from the i2c device
* @dd: the infinipath device
*
* When we add the multi-chip support, we will probably have to add
* the ability to use the number of guids field, and get the guid from
* the first chip's flash, to use for all of them.
*/
void ipath_get_guid(struct ipath_devdata *dd)
{
void *buf;
struct ipath_flash *ifp;
__be64 guid;
int len;
u8 csum, *bguid;
int t = dd->ipath_unit;
struct ipath_devdata *dd0 = ipath_lookup(0);
if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
u8 *bguid, oguid;
dd->ipath_guid = dd0->ipath_guid;
bguid = (u8 *) & dd->ipath_guid;
oguid = bguid[7];
bguid[7] += t;
if (oguid > bguid[7]) {
if (bguid[6] == 0xff) {
if (bguid[5] == 0xff) {
ipath_dev_err(
dd,
"Can't set %s GUID from "
"base, wraps to OUI!\n",
ipath_get_unit_name(t));
dd->ipath_guid = 0;
goto bail;
}
bguid[5]++;
}
bguid[6]++;
}
dd->ipath_nguid = 1;
ipath_dbg("nguid %u, so adding %u to device 0 guid, "
"for %llx\n",
dd0->ipath_nguid, t,
(unsigned long long) be64_to_cpu(dd->ipath_guid));
goto bail;
}
len = offsetof(struct ipath_flash, if_future);
buf = vmalloc(len);
if (!buf) {
ipath_dev_err(dd, "Couldn't allocate memory to read %u "
"bytes from eeprom for GUID\n", len);
goto bail;
}
if (ipath_eeprom_read(dd, 0, buf, len)) {
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
goto done;
}
ifp = (struct ipath_flash *)buf;
csum = flash_csum(ifp, 0);
if (csum != ifp->if_csum) {
dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
"0x%x, not 0x%x\n", csum, ifp->if_csum);
goto done;
}
if (*(__be64 *) ifp->if_guid == 0ULL ||
*(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
ipath_dev_err(dd, "Invalid GUID %llx from flash; "
"ignoring\n",
*(unsigned long long *) ifp->if_guid);
/* don't allow GUID if all 0 or all 1's */
goto done;
}
/* complain, but allow it */
if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
"default, probably not correct!\n",
*(unsigned long long *) ifp->if_guid);
bguid = ifp->if_guid;
if (!bguid[0] && !bguid[1] && !bguid[2]) {
/* original incorrect GUID format in flash; fix in
* core copy, by shifting up 2 octets; don't need to
* change top octet, since both it and shifted are
* 0.. */
bguid[1] = bguid[3];
bguid[2] = bguid[4];
bguid[3] = bguid[4] = 0;
guid = *(__be64 *) ifp->if_guid;
ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
"shifting 2 octets\n");
} else
guid = *(__be64 *) ifp->if_guid;
dd->ipath_guid = guid;
dd->ipath_nguid = ifp->if_numguid;
memcpy(dd->ipath_serial, ifp->if_serial,
sizeof(ifp->if_serial));
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
done:
vfree(buf);
bail:;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,605 @@
/*
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/version.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
#include <linux/pci.h>
#include "ipath_kernel.h"
#define IPATHFS_MAGIC 0x726a77
static struct super_block *ipath_super;
static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
int mode, struct file_operations *fops,
void *data)
{
int error;
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
error = -EPERM;
goto bail;
}
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->u.generic_ip = data;
if ((mode & S_IFMT) == S_IFDIR) {
inode->i_op = &simple_dir_inode_operations;
inode->i_nlink++;
dir->i_nlink++;
}
inode->i_fop = fops;
d_instantiate(dentry, inode);
error = 0;
bail:
return error;
}
static int create_file(const char *name, mode_t mode,
struct dentry *parent, struct dentry **dentry,
struct file_operations *fops, void *data)
{
int error;
*dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry))
error = ipathfs_mknod(parent->d_inode, *dentry,
mode, fops, data);
else
error = PTR_ERR(dentry);
mutex_unlock(&parent->d_inode->i_mutex);
return error;
}
static ssize_t atomic_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
sizeof ipath_stats);
}
static struct file_operations atomic_stats_ops = {
.read = atomic_stats_read,
};
#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64)
static ssize_t atomic_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u64 counters[NUM_COUNTERS];
u16 i;
struct ipath_devdata *dd;
dd = file->f_dentry->d_inode->u.generic_ip;
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = ipath_snap_cntr(dd, i);
return simple_read_from_buffer(buf, count, ppos, counters,
sizeof counters);
}
static struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
};
static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u32 nodeinfo[10];
struct ipath_devdata *dd;
u64 guid;
dd = file->f_dentry->d_inode->u.generic_ip;
guid = be64_to_cpu(dd->ipath_guid);
nodeinfo[0] = /* BaseVersion is SMA */
/* ClassVersion is SMA */
(1 << 8) /* NodeType */
| (1 << 0); /* NumPorts */
nodeinfo[1] = (u32) (guid >> 32);
nodeinfo[2] = (u32) (guid & 0xffffffff);
/* PortGUID == SystemImageGUID for us */
nodeinfo[3] = nodeinfo[1];
/* PortGUID == SystemImageGUID for us */
nodeinfo[4] = nodeinfo[2];
/* PortGUID == NodeGUID for us */
nodeinfo[5] = nodeinfo[3];
/* PortGUID == NodeGUID for us */
nodeinfo[6] = nodeinfo[4];
nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
| (dd->ipath_deviceid << 0);
/* our chip version as 16 bits major, 16 bits minor */
nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
return simple_read_from_buffer(buf, count, ppos, nodeinfo,
sizeof nodeinfo);
}
static struct file_operations atomic_node_info_ops = {
.read = atomic_node_info_read,
};
static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u32 portinfo[13];
u32 tmp, tmp2;
struct ipath_devdata *dd;
dd = file->f_dentry->d_inode->u.generic_ip;
/* so we only initialize non-zero fields. */
memset(portinfo, 0, sizeof portinfo);
/*
* Notimpl yet M_Key (64)
* Notimpl yet GID (64)
*/
portinfo[4] = (dd->ipath_lid << 16);
/*
* Notimpl yet SMLID (should we store this in the driver, in case
* SMA dies?) CapabilityMask is 0, we don't support any of these
* DiagCode is 0; we don't store any diag info for now Notimpl yet
* M_KeyLeasePeriod (we don't support M_Key)
*/
/* LocalPortNum is whichever port number they ask for */
portinfo[7] = (dd->ipath_unit << 24)
/* LinkWidthEnabled */
| (2 << 16)
/* LinkWidthSupported (really 2, but not IB valid) */
| (3 << 8)
/* LinkWidthActive */
| (2 << 0);
tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
tmp2 = 5;
if (tmp == IPATH_IBSTATE_INIT)
tmp = 2;
else if (tmp == IPATH_IBSTATE_ARM)
tmp = 3;
else if (tmp == IPATH_IBSTATE_ACTIVE)
tmp = 4;
else {
tmp = 0; /* down */
tmp2 = tmp & 0xf;
}
portinfo[8] = (1 << 28) /* LinkSpeedSupported */
| (tmp << 24) /* PortState */
| (tmp2 << 20) /* PortPhysicalState */
| (2 << 16)
/* LinkDownDefaultState */
/* M_KeyProtectBits == 0 */
/* NotImpl yet LMC == 0 (we can support all values) */
| (1 << 4) /* LinkSpeedActive */
| (1 << 0); /* LinkSpeedEnabled */
switch (dd->ipath_ibmtu) {
case 4096:
tmp = 5;
break;
case 2048:
tmp = 4;
break;
case 1024:
tmp = 3;
break;
case 512:
tmp = 2;
break;
case 256:
tmp = 1;
break;
default: /* oops, something is wrong */
ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
"treat as 2048\n", dd->ipath_ibmtu);
tmp = 4;
break;
}
portinfo[9] = (tmp << 28)
/* NeighborMTU */
/* Notimpl MasterSMSL */
| (1 << 20)
/* VLCap */
/* Notimpl InitType (actually, an SMA decision) */
/* VLHighLimit is 0 (only one VL) */
; /* VLArbitrationHighCap is 0 (only one VL) */
portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
/* InitTypeReply is SMA decision */
(5 << 16) /* MTUCap 4096 */
| (7 << 13) /* VLStallCount */
| (0x1f << 8) /* HOQLife */
| (1 << 4)
/* OperationalVLs 0 */
/* PartitionEnforcementInbound */
/* PartitionEnforcementOutbound not enforced */
/* FilterRawinbound not enforced */
; /* FilterRawOutbound not enforced */
/* M_KeyViolations are not counted by hardware, SMA can count */
tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
/* P_KeyViolations are counted by hardware. */
portinfo[11] = ((tmp & 0xffff) << 0);
portinfo[12] =
/* Q_KeyViolations are not counted by hardware */
(1 << 8)
/* GUIDCap */
/* SubnetTimeOut handled by SMA */
/* RespTimeValue handled by SMA */
;
/* LocalPhyErrors are programmed to max */
portinfo[12] |= (0xf << 20)
| (0xf << 16) /* OverRunErrors are programmed to max */
;
return simple_read_from_buffer(buf, count, ppos, portinfo,
sizeof portinfo);
}
static struct file_operations atomic_port_info_ops = {
.read = atomic_port_info_read,
};
static ssize_t flash_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct ipath_devdata *dd;
ssize_t ret;
loff_t pos;
char *tmp;
pos = *ppos;
if ( pos < 0) {
ret = -EINVAL;
goto bail;
}
if (pos >= sizeof(struct ipath_flash)) {
ret = 0;
goto bail;
}
if (count > sizeof(struct ipath_flash) - pos)
count = sizeof(struct ipath_flash) - pos;
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
goto bail;
}
dd = file->f_dentry->d_inode->u.generic_ip;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
goto bail_tmp;
}
if (copy_to_user(buf, tmp, count)) {
ret = -EFAULT;
goto bail_tmp;
}
*ppos = pos + count;
ret = count;
bail_tmp:
kfree(tmp);
bail:
return ret;
}
static ssize_t flash_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct ipath_devdata *dd;
ssize_t ret;
loff_t pos;
char *tmp;
pos = *ppos;
if ( pos < 0) {
ret = -EINVAL;
goto bail;
}
if (pos >= sizeof(struct ipath_flash)) {
ret = 0;
goto bail;
}
if (count > sizeof(struct ipath_flash) - pos)
count = sizeof(struct ipath_flash) - pos;
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
goto bail;
}
if (copy_from_user(tmp, buf, count)) {
ret = -EFAULT;
goto bail_tmp;
}
dd = file->f_dentry->d_inode->u.generic_ip;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
goto bail_tmp;
}
*ppos = pos + count;
ret = count;
bail_tmp:
kfree(tmp);
bail:
return ret;
}
static struct file_operations flash_ops = {
.read = flash_read,
.write = flash_write,
};
static int create_device_files(struct super_block *sb,
struct ipath_devdata *dd)
{
struct dentry *dir, *tmp;
char unit[10];
int ret;
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
(struct file_operations *) &simple_dir_operations,
dd);
if (ret) {
printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_counters_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/atomic_counters) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_node_info_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/node_info) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_port_info_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/port_info) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/flash) "
"failed: %d\n", unit, ret);
goto bail;
}
bail:
return ret;
}
static void remove_file(struct dentry *parent, char *name)
{
struct dentry *tmp;
tmp = lookup_one_len(name, parent, strlen(name));
spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
dget_locked(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, tmp);
} else {
spin_unlock(&tmp->d_lock);
spin_unlock(&dcache_lock);
}
}
static int remove_device_files(struct super_block *sb,
struct ipath_devdata *dd)
{
struct dentry *dir, *root;
char unit[10];
int ret;
root = dget(sb->s_root);
mutex_lock(&root->d_inode->i_mutex);
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
printk(KERN_ERR "Lookup of %s failed\n", unit);
goto bail;
}
remove_file(dir, "flash");
remove_file(dir, "port_info");
remove_file(dir, "node_info");
remove_file(dir, "atomic_counters");
d_delete(dir);
ret = simple_rmdir(root->d_inode, dir);
bail:
mutex_unlock(&root->d_inode->i_mutex);
dput(root);
return ret;
}
static int ipathfs_fill_super(struct super_block *sb, void *data,
int silent)
{
struct ipath_devdata *dd, *tmp;
unsigned long flags;
int ret;
static struct tree_descr files[] = {
[1] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
{""},
};
ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
if (ret) {
printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
goto bail;
}
spin_lock_irqsave(&ipath_devs_lock, flags);
list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
spin_unlock_irqrestore(&ipath_devs_lock, flags);
ret = create_device_files(sb, dd);
if (ret) {
deactivate_super(sb);
goto bail;
}
spin_lock_irqsave(&ipath_devs_lock, flags);
}
spin_unlock_irqrestore(&ipath_devs_lock, flags);
bail:
return ret;
}
static struct super_block *ipathfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
ipath_super = get_sb_single(fs_type, flags, data,
ipathfs_fill_super);
return ipath_super;
}
static void ipathfs_kill_super(struct super_block *s)
{
kill_litter_super(s);
ipath_super = NULL;
}
int ipathfs_add_device(struct ipath_devdata *dd)
{
int ret;
if (ipath_super == NULL) {
ret = 0;
goto bail;
}
ret = create_device_files(ipath_super, dd);
bail:
return ret;
}
int ipathfs_remove_device(struct ipath_devdata *dd)
{
int ret;
if (ipath_super == NULL) {
ret = 0;
goto bail;
}
ret = remove_device_files(ipath_super, dd);
bail:
return ret;
}
static struct file_system_type ipathfs_fs_type = {
.owner = THIS_MODULE,
.name = "ipathfs",
.get_sb = ipathfs_get_sb,
.kill_sb = ipathfs_kill_super,
};
int __init ipath_init_ipathfs(void)
{
return register_filesystem(&ipathfs_fs_type);
}
void __exit ipath_exit_ipathfs(void)
{
unregister_filesystem(&ipathfs_fs_type);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,951 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
#include "ips_common.h"
/*
* min buffers we want to have per port, after driver
*/
#define IPATH_MIN_USER_PORT_BUFCNT 8
/*
* Number of ports we are configured to use (to allow for more pio
* buffers per port, etc.) Zero means use chip value.
*/
static ushort ipath_cfgports;
module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
* Number of buffers reserved for driver (layered drivers and SMA
* send). Reserved at end of buffer list.
*/
static ushort ipath_kpiobufs = 32;
static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_uint,
&ipath_kpiobufs, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
/**
* create_port0_egr - allocate the eager TID buffers
* @dd: the infinipath device
*
* This code is now quite different for user and kernel, because
* the kernel uses skb's, for the accelerated network performance.
* This is the kernel (port0) version.
*
* Allocate the eager TID buffers and program them into infinipath.
* We use the network layer alloc_skb() allocator to allocate the
* memory, and either use the buffers as is for things like SMA
* packets, or pass the buffers up to the ipath layered driver and
* thence the network layer, replacing them as we do so (see
* ipath_rcv_layer()).
*/
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
struct sk_buff **skbs;
int ret;
egrcnt = dd->ipath_rcvegrcnt;
skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
if (skbs == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
goto bail;
}
for (e = 0; e < egrcnt; e++) {
/*
* This is a bit tricky in that we allocate extra
* space for 2 bytes of the 14 byte ethernet header.
* These two bytes are passed in the ipath header so
* the rest of the data is word aligned. We allocate
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
if (!skbs[e]) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
dev_kfree_skb(skbs[--e]);
ret = -ENOMEM;
goto bail;
}
}
/*
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
dd->ipath_port0_skbs = skbs;
for (e = 0; e < egrcnt; e++) {
unsigned long phys =
virt_to_phys(dd->ipath_port0_skbs[e]->data);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
dd->ipath_rcvegrbase), 0, phys);
}
ret = 0;
bail:
return ret;
}
static int bringup_link(struct ipath_devdata *dd)
{
u64 val, ibc;
int ret = 0;
/* hold IBC in reset */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control);
/*
* Note that prior to try 14 or 15 of IB, the credit scaling
* wasn't working, because it was swapped for writes with the
* 1 bit default linkstate field
*/
/* ignore pbc and align word */
val = dd->ipath_piosize2k - 2 * sizeof(u32);
/*
* for ICRC, which we only send in diag test pkt mode, and we
* don't need to worry about that for mtu
*/
val += 1;
/*
* Set the IBC maxpktlength to the size of our pio buffers the
* maxpktlength is in words. This is *not* the IB data MTU.
*/
ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
/* in KB */
ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
/*
* How often flowctrl sent. More or less in usecs; balance against
* watermark value, so that in theory senders always get a flow
* control update in time to not let the IB link go idle.
*/
ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
/* max error tolerance */
ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
/* use "real" buffer space for */
ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
/* IB credit flow control. */
ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
/* initially come up waiting for TS1, without sending anything. */
dd->ipath_ibcctrl = ibc;
/*
* Want to start out with both LINKCMD and LINKINITCMD in NOP
* (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that
* to stay a NOP
*/
ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT;
ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
(unsigned long long) ibc);
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
// be sure chip saw it
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
ret = dd->ipath_f_bringup_serdes(dd);
if (ret)
dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
"not usable\n");
else {
/* enable IBC */
dd->ipath_control |= INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control);
}
return ret;
}
static int init_chip_first(struct ipath_devdata *dd,
struct ipath_portdata **pdp)
{
struct ipath_portdata *pd = NULL;
int ret = 0;
u64 val;
/*
* skip cfgports stuff because we are not allocating memory,
* and we don't want problems if the portcnt changed due to
* cfgports. We do still check and report a difference, if
* not same (should be impossible).
*/
dd->ipath_portcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
if (!ipath_cfgports)
dd->ipath_cfgports = dd->ipath_portcnt;
else if (ipath_cfgports <= dd->ipath_portcnt) {
dd->ipath_cfgports = ipath_cfgports;
ipath_dbg("Configured to use %u ports out of %u in chip\n",
dd->ipath_cfgports, dd->ipath_portcnt);
} else {
dd->ipath_cfgports = dd->ipath_portcnt;
ipath_dbg("Tried to configured to use %u ports; chip "
"only supports %u\n", ipath_cfgports,
dd->ipath_portcnt);
}
dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
GFP_KERNEL);
if (!dd->ipath_pd) {
ipath_dev_err(dd, "Unable to allocate portdata array, "
"failing\n");
ret = -ENOMEM;
goto done;
}
dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads)
* dd->ipath_cfgports,
GFP_KERNEL);
dd->ipath_lastrcvhdrqtails =
kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails)
* dd->ipath_cfgports, GFP_KERNEL);
if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) {
ipath_dev_err(dd, "Unable to allocate head arrays, "
"failing\n");
ret = -ENOMEM;
goto done;
}
dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!dd->ipath_pd[0]) {
ipath_dev_err(dd, "Unable to allocate portdata for port "
"0, failing\n");
ret = -ENOMEM;
goto done;
}
pd = dd->ipath_pd[0];
pd->port_dd = dd;
pd->port_port = 0;
pd->port_cnt = 1;
/* The port 0 pkey table is used by the layer interface. */
pd->port_pkeys[0] = IPS_DEFAULT_P_KEY;
dd->ipath_rcvtidcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
dd->ipath_rcvtidbase =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
dd->ipath_rcvegrcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
dd->ipath_rcvegrbase =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
dd->ipath_palign =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
dd->ipath_piobufbase =
ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
dd->ipath_piosize2k = val & ~0U;
dd->ipath_piosize4k = val >> 32;
dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
dd->ipath_piobcnt2k = val & ~0U;
dd->ipath_piobcnt4k = val >> 32;
dd->ipath_pio2kbase =
(u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
(dd->ipath_piobufbase & 0xffffffff));
if (dd->ipath_piobcnt4k) {
dd->ipath_pio4kbase = (u32 __iomem *)
(((char __iomem *) dd->ipath_kregbase) +
(dd->ipath_piobufbase >> 32));
/*
* 4K buffers take 2 pages; we use roundup just to be
* paranoid; we calculate it once here, rather than on
* ever buf allocate
*/
dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
dd->ipath_palign);
ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
"(%x aligned)\n",
dd->ipath_piobcnt2k, dd->ipath_piosize2k,
dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
dd->ipath_piosize4k, dd->ipath_pio4kbase,
dd->ipath_4kalign);
}
else ipath_dbg("%u 2k piobufs @ %p\n",
dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
spin_lock_init(&dd->ipath_tid_lock);
done:
*pdp = pd;
return ret;
}
/**
* init_chip_reset - re-initialize after a reset, or enable
* @dd: the infinipath device
* @pdp: output for port data
*
* sanity check at least some of the values after reset, and
* ensure no receive or transmit (explictly, in case reset
* failed
*/
static int init_chip_reset(struct ipath_devdata *dd,
struct ipath_portdata **pdp)
{
struct ipath_portdata *pd;
u32 rtmp;
*pdp = pd = dd->ipath_pd[0];
/* ensure chip does no sends or receives while we re-initialize */
dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
if (dd->ipath_portcnt != rtmp)
dev_info(&dd->pcidev->dev, "portcnt was %u before "
"reset, now %u, using original\n",
dd->ipath_portcnt, rtmp);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
if (rtmp != dd->ipath_rcvtidcnt)
dev_info(&dd->pcidev->dev, "tidcnt was %u before "
"reset, now %u, using original\n",
dd->ipath_rcvtidcnt, rtmp);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
if (rtmp != dd->ipath_rcvtidbase)
dev_info(&dd->pcidev->dev, "tidbase was %u before "
"reset, now %u, using original\n",
dd->ipath_rcvtidbase, rtmp);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
if (rtmp != dd->ipath_rcvegrcnt)
dev_info(&dd->pcidev->dev, "egrcnt was %u before "
"reset, now %u, using original\n",
dd->ipath_rcvegrcnt, rtmp);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
if (rtmp != dd->ipath_rcvegrbase)
dev_info(&dd->pcidev->dev, "egrbase was %u before "
"reset, now %u, using original\n",
dd->ipath_rcvegrbase, rtmp);
return 0;
}
static int init_pioavailregs(struct ipath_devdata *dd)
{
int ret;
dd->ipath_pioavailregs_dma = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
GFP_KERNEL);
if (!dd->ipath_pioavailregs_dma) {
ipath_dev_err(dd, "failed to allocate PIOavail reg area "
"in memory\n");
ret = -ENOMEM;
goto done;
}
/*
* we really want L2 cache aligned, but for current CPUs of
* interest, they are the same.
*/
dd->ipath_statusp = (u64 *)
((char *)dd->ipath_pioavailregs_dma +
((2 * L1_CACHE_BYTES +
dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
/* copy the current value now that it's really allocated */
*dd->ipath_statusp = dd->_ipath_status;
/*
* setup buffer to hold freeze msg, accessible to apps,
* following statusp
*/
dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
/* and its length */
dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) {
ipath_dev_err(dd, "unit %u too large for port 0 "
"rcvhdrtail buffer size\n", dd->ipath_unit);
ret = -ENODEV;
}
else
ret = 0;
/* so we can get current tail in ipath_kreceive(), per chip */
dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[
dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))];
done:
return ret;
}
/**
* init_shadow_tids - allocate the shadow TID array
* @dd: the infinipath device
*
* allocate the shadow TID array, so we can ipath_munlock previous
* entries. It may make more sense to move the pageshadow to the
* port data structure, so we only allocate memory for ports actually
* in use, since we at 8k per port, now.
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
dd->ipath_pageshadow = (struct page **)
vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
if (!dd->ipath_pageshadow)
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
else
memset(dd->ipath_pageshadow, 0,
dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
}
static void enable_chip(struct ipath_devdata *dd,
struct ipath_portdata *pd, int reinit)
{
u32 val;
int i;
if (!reinit) {
init_waitqueue_head(&ipath_sma_state_wait);
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
/* Enable PIO send, and update of PIOavail regs to memory. */
dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
INFINIPATH_S_PIOBUFAVAILUPD;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
/*
* enable port 0 receive, and receive interrupt. other ports
* done as user opens and inits them.
*/
dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD |
(1ULL << INFINIPATH_R_PORTENABLE_SHIFT) |
(1ULL << INFINIPATH_R_INTRAVAIL_SHIFT);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
/*
* now ready for use. this should be cleared whenever we
* detect a reset, or initiate one.
*/
dd->ipath_flags |= IPATH_INITTED;
/*
* init our shadow copies of head from tail values, and write
* head values to match.
*/
val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
(void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0);
/* Initialize so we interrupt on next packet received */
(void)ipath_write_ureg(dd, ur_rcvhdrhead,
dd->ipath_rhdrhead_intr_off |
dd->ipath_port0head, 0);
/*
* by now pioavail updates to memory should have occurred, so
* copy them into our working/shadow registers; this is in
* case something went wrong with abort, but mostly to get the
* initial values of the generation bit correct.
*/
for (i = 0; i < dd->ipath_pioavregs; i++) {
__le64 val;
/*
* Chip Errata bug 6641; even and odd qwords>3 are swapped.
*/
if (i > 3) {
if (i & 1)
val = dd->ipath_pioavailregs_dma[i - 1];
else
val = dd->ipath_pioavailregs_dma[i + 1];
}
else
val = dd->ipath_pioavailregs_dma[i];
dd->ipath_pioavailshadow[i] = le64_to_cpu(val);
}
/* can get counters, stats, etc. */
dd->ipath_flags |= IPATH_PRESENT;
}
static int init_housekeeping(struct ipath_devdata *dd,
struct ipath_portdata **pdp, int reinit)
{
char boardn[32];
int ret = 0;
/*
* have to clear shadow copies of registers at init that are
* not otherwise set here, or all kinds of bizarre things
* happen with driver on chip reset
*/
dd->ipath_rcvhdrsize = 0;
/*
* Don't clear ipath_flags as 8bit mode was set before
* entering this func. However, we do set the linkstate to
* unknown, so we can watch for a transition.
*/
dd->ipath_flags |= IPATH_LINKUNK;
dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
IPATH_LINKDOWN | IPATH_LINKINIT);
ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
dd->ipath_revision =
ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
/*
* set up fundamental info we need to use the chip; we assume
* if the revision reg and these regs are OK, we don't need to
* special case the rest
*/
dd->ipath_sregbase =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
dd->ipath_cregbase =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
dd->ipath_uregbase =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
"cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
dd->ipath_uregbase, dd->ipath_cregbase);
if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
|| (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
|| (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
|| (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
ipath_dev_err(dd, "Register read failures from chip, "
"giving up initialization\n");
ret = -ENODEV;
goto done;
}
/* clear the initial reset flag, in case first driver load */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
INFINIPATH_E_RESET);
if (reinit)
ret = init_chip_reset(dd, pdp);
else
ret = init_chip_first(dd, pdp);
if (ret)
goto done;
ipath_cdbg(VERBOSE, "Revision %llx (PCI %x), %u ports, %u tids, "
"%u egrtids\n", (unsigned long long) dd->ipath_revision,
dd->ipath_pcirev, dd->ipath_portcnt, dd->ipath_rcvtidcnt,
dd->ipath_rcvegrcnt);
if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
ipath_dev_err(dd, "Driver only handles version %d, "
"chip swversion is %d (%llx), failng\n",
IPATH_CHIP_SWVERSION,
(int)(dd->ipath_revision >>
INFINIPATH_R_SOFTWARE_SHIFT) &
INFINIPATH_R_SOFTWARE_MASK,
(unsigned long long) dd->ipath_revision);
ret = -ENOSYS;
goto done;
}
dd->ipath_majrev = (u8) ((dd->ipath_revision >>
INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
INFINIPATH_R_CHIPREVMAJOR_MASK);
dd->ipath_minrev = (u8) ((dd->ipath_revision >>
INFINIPATH_R_CHIPREVMINOR_SHIFT) &
INFINIPATH_R_CHIPREVMINOR_MASK);
dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
INFINIPATH_R_BOARDID_SHIFT) &
INFINIPATH_R_BOARDID_MASK);
ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
"Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
"SW Compat %u\n",
IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
(unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
INFINIPATH_R_ARCH_MASK,
dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
(unsigned)(dd->ipath_revision >>
INFINIPATH_R_SOFTWARE_SHIFT) &
INFINIPATH_R_SOFTWARE_MASK);
ipath_dbg("%s", dd->ipath_boardversion);
done:
return ret;
}
/**
* ipath_init_chip - do the actual initialization sequence on the chip
* @dd: the infinipath device
* @reinit: reinitializing, so don't allocate new memory
*
* Do the actual initialization sequence on the chip. This is done
* both from the init routine called from the PCI infrastructure, and
* when we reset the chip, or detect that it was reset internally,
* or it's administratively re-enabled.
*
* Memory allocation here and in called routines is only done in
* the first case (reinit == 0). We have to be careful, because even
* without memory allocation, we need to re-write all the chip registers
* TIDs, etc. after the reset or enable has completed.
*/
int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0, i;
u32 val32, kpiobufs;
u64 val, atmp;
struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
ret = init_housekeeping(dd, &pd, reinit);
if (ret)
goto done;
/*
* we ignore most issues after reporting them, but have to specially
* handle hardware-disabled chips.
*/
if (ret == 2) {
/* unique error, known to ipath_init_one */
ret = -EPERM;
goto done;
}
/*
* We could bump this to allow for full rcvegrcnt + rcvtidcnt,
* but then it no longer nicely fits power of two, and since
* we now use routines that backend onto __get_free_pages, the
* rest would be wasted.
*/
dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt;
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
dd->ipath_rcvhdrcnt);
/*
* Set up the shadow copies of the piobufavail registers,
* which we compare against the chip registers for now, and
* the in memory DMA'ed copies of the registers. This has to
* be done early, before we calculate lastport, etc.
*/
val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/*
* calc number of pioavail registers, and save it; we have 2
* bits per buffer.
*/
dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
if (!ipath_kpiobufs) /* have to have at least 1, for SMA */
kpiobufs = ipath_kpiobufs = 1;
else if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) <
(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT)) {
dev_info(&dd->pcidev->dev, "Too few PIO buffers (%u) "
"for %u ports to have %u each!\n",
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
dd->ipath_cfgports, IPATH_MIN_USER_PORT_BUFCNT);
kpiobufs = 1; /* reserve just the minimum for SMA/ether */
} else
kpiobufs = ipath_kpiobufs;
if (kpiobufs >
(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 0)
i = 0;
dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
"kernel leaves too few for %d user ports "
"(%d each); using %u\n", kpiobufs,
dd->ipath_cfgports - 1,
IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
}
dd->ipath_lastport_piobuf =
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
dd->ipath_pbufsport = dd->ipath_cfgports > 1
? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
: 0;
val32 = dd->ipath_lastport_piobuf -
(dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
if (val32 > 0) {
ipath_dbg("allocating %u pbufs/port leaves %u unused, "
"add to kernel\n", dd->ipath_pbufsport, val32);
dd->ipath_lastport_piobuf -= val32;
ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
dd->ipath_pbufsport, val32);
}
dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
dd->ipath_pbufsport, dd->ipath_cfgports - 1);
dd->ipath_f_early_init(dd);
/* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
* done after early_init */
dd->ipath_hdrqlast =
dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
dd->ipath_rcvhdrentsize);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
dd->ipath_rcvhdrsize);
if (!reinit) {
ret = init_pioavailregs(dd);
init_shadow_tids(dd);
if (ret)
goto done;
}
(void)ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
dd->ipath_pioavailregs_phys);
/*
* this is to detect s/w errors, which the h/w works around by
* ignoring the low 6 bits of address, if it wasn't aligned.
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
if (val != dd->ipath_pioavailregs_phys) {
ipath_dev_err(dd, "Catastrophic software error, "
"SendPIOAvailAddr written as %lx, "
"read back as %llx\n",
(unsigned long) dd->ipath_pioavailregs_phys,
(unsigned long long) val);
ret = -EINVAL;
goto done;
}
val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64;
/* verify that the alignment requirement was met */
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
0, val);
atmp = ipath_read_kreg64_port(
dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0);
if (val != atmp) {
ipath_dev_err(dd, "Catastrophic software error, "
"RcvHdrTailAddr0 written as %llx, "
"read back as %llx from %x\n",
(unsigned long long) val,
(unsigned long long) atmp,
dd->ipath_kregs->kr_rcvhdrtailaddr);
ret = -EINVAL;
goto done;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
/*
* make sure we are not in freeze, and PIO send enabled, so
* writes to pbc happen
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_PIOENABLE);
/*
* before error clears, since we expect serdes pll errors during
* this, the first time after reset
*/
if (bringup_link(dd)) {
dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
ret = -ENETDOWN;
goto done;
}
/*
* clear any "expected" hwerrs from reset and/or initialization
* clear any that aren't enabled (at least this once), and then
* set the enable mask
*/
dd->ipath_f_init_hwerrors(dd);
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* clear all */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
/* enable errors that are masked, at least this first time. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
/* clear any interrups up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
ipath_stats.sps_lid[dd->ipath_unit] = dd->ipath_lid;
/*
* Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
*/
if (reinit)
ipath_free_pddata(dd, 0, 0);
dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd);
if (!ret)
ret = create_port0_egr(dd);
if (ret)
ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
"rcvhdrq and/or egr bufs\n");
else
enable_chip(dd, pd, reinit);
/*
* cause retrigger of pending interrupts ignored during init,
* even if we had errors
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
if(!dd->ipath_stats_timer_active) {
/*
* first init, or after an admin disable/enable
* set up stats retrieval timer, even if we had errors
* in last portion of setup
*/
init_timer(&dd->ipath_stats_timer);
dd->ipath_stats_timer.function = ipath_get_faststats;
dd->ipath_stats_timer.data = (unsigned long) dd;
/* every 5 seconds; */
dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
/* takes ~16 seconds to overflow at full IB 4x bandwdith */
add_timer(&dd->ipath_stats_timer);
dd->ipath_stats_timer_active = 1;
}
done:
if (!ret) {
ipath_get_guid(dd);
*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
if (!dd->ipath_f_intrsetup(dd)) {
/* now we can enable all interrupts from the chip */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-1LL);
/* force re-interrupt of any pending interrupts. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
0ULL);
/* chip is usable; mark it as initialized */
*dd->ipath_statusp |= IPATH_STATUS_INITTED;
} else
ipath_dev_err(dd, "No interrupts enabled, couldn't "
"setup interrupt address\n");
if (dd->ipath_cfgports > ipath_stats.sps_nports)
/*
* sps_nports is a global, so, we set it to
* the highest number of ports of any of the
* chips we find; we never decrement it, at
* least for now. Since this might have changed
* over disable/enable or prior to reset, always
* do the check and potentially adjust.
*/
ipath_stats.sps_nports = dd->ipath_cfgports;
} else
ipath_dbg("Failed (%d) to initialize chip\n", ret);
/* if ret is non-zero, we probably should do some cleanup
here... */
return ret;
}
static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
{
struct ipath_devdata *dd;
unsigned long flags;
unsigned short val;
int ret;
ret = ipath_parse_ushort(str, &val);
spin_lock_irqsave(&ipath_devs_lock, flags);
if (ret < 0)
goto bail;
if (val == 0) {
ret = -EINVAL;
goto bail;
}
list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
if (dd->ipath_kregbase)
continue;
if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
(dd->ipath_cfgports *
IPATH_MIN_USER_PORT_BUFCNT)))
{
ipath_dev_err(
dd,
"Allocating %d PIO bufs for kernel leaves "
"too few for %d user ports (%d each)\n",
val, dd->ipath_cfgports - 1,
IPATH_MIN_USER_PORT_BUFCNT);
ret = -EINVAL;
goto bail;
}
dd->ipath_lastport_piobuf =
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
}
ret = 0;
bail:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
return ret;
}

View File

@ -0,0 +1,841 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/pci.h>
#include "ipath_kernel.h"
#include "ips_common.h"
#include "ipath_layer.h"
#define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
#define E_SUM_ERRS \
(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
INFINIPATH_E_INVALIDADDR)
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
unsigned long sbuf[4];
u64 ignore_this_time = 0;
u32 piobcnt;
/* if possible that sendbuffererror could be valid */
piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/* read these before writing errorclear */
sbuf[0] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror);
sbuf[1] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 1);
if (piobcnt > 128) {
sbuf[2] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 2);
sbuf[3] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 3);
}
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
printk("%lx %lx ", sbuf[2], sbuf[3]);
for (i = 0; i < piobcnt; i++) {
if (test_bit(i, sbuf)) {
u32 __iomem *piobuf;
if (i < dd->ipath_piobcnt2k)
piobuf = (u32 __iomem *)
(dd->ipath_pio2kbase +
i * dd->ipath_palign);
else
piobuf = (u32 __iomem *)
(dd->ipath_pio4kbase +
(i - dd->ipath_piobcnt2k) *
dd->ipath_4kalign);
ipath_cdbg(PKT,
"PIObuf[%u] @%p pbc is %x; ",
i, piobuf, readl(piobuf));
ipath_disarm_piobufs(dd, i, 1);
}
}
if (ipath_debug & __IPATH_PKTDBG)
printk("\n");
}
if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT |
INFINIPATH_E_SDROPPEDSMPPKT |
INFINIPATH_E_SMINPKTLEN)) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
* This can happen when SMA is trying to bring the link
* up, but the IB link changes state at the "wrong" time.
* The IB logic then complains that the packet isn't
* valid. We don't want to confuse people, so we just
* don't print them, except at debug
*/
ipath_dbg("Ignoring pktsend errors %llx, because not "
"yet active\n", (unsigned long long) errs);
ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT |
INFINIPATH_E_SDROPPEDSMPPKT |
INFINIPATH_E_SMINPKTLEN;
}
return ignore_this_time;
}
/* return the strings for the most common link states */
static char *ib_linkstate(u32 linkstate)
{
char *ret;
switch (linkstate) {
case IPATH_IBSTATE_INIT:
ret = "Init";
break;
case IPATH_IBSTATE_ARM:
ret = "Arm";
break;
case IPATH_IBSTATE_ACTIVE:
ret = "Active";
break;
default:
ret = "Down";
}
return ret;
}
static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
ipath_err_t errs, int noprint)
{
u64 val;
u32 ltstate, lstate;
/*
* even if diags are enabled, we want to notice LINKINIT, etc.
* We just don't want to change the LED state, or
* dd->ipath_kregs->kr_ibcctrl
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
lstate = val & IPATH_IBSTATE_MASK;
if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE) {
/*
* only print at SMA if there is a change, debug if not
* (sometimes we want to know that, usually not).
*/
if (lstate == ((unsigned) dd->ipath_lastibcstat
& IPATH_IBSTATE_MASK)) {
ipath_dbg("Status change intr but no change (%s)\n",
ib_linkstate(lstate));
}
else
ipath_cdbg(SMA, "Unit %u link state %s, last "
"was %s\n", dd->ipath_unit,
ib_linkstate(lstate),
ib_linkstate((unsigned)
dd->ipath_lastibcstat
& IPATH_IBSTATE_MASK));
}
else {
lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
if (lstate == IPATH_IBSTATE_INIT ||
lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE)
ipath_cdbg(SMA, "Unit %u link state down"
" (state 0x%x), from %s\n",
dd->ipath_unit,
(u32)val & IPATH_IBSTATE_MASK,
ib_linkstate(lstate));
else
ipath_cdbg(VERBOSE, "Unit %u link state changed "
"to 0x%x from down (%x)\n",
dd->ipath_unit, (u32) val, lstate);
}
ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK;
if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
u32 last_ltstate;
/*
* Ignore cycling back and forth from Polling.Active
* to Polling.Quiet while waiting for the other end of
* the link to come up. We will cycle back and forth
* between them if no cable is plugged in,
* the other device is powered off or disabled, etc.
*/
last_ltstate = (dd->ipath_lastibcstat >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
& INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
|| last_ltstate ==
INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
if (dd->ipath_ibpollcnt > 40) {
dd->ipath_flags |= IPATH_NOCABLE;
*dd->ipath_statusp |=
IPATH_STATUS_IB_NOCABLE;
} else
dd->ipath_ibpollcnt++;
goto skip_ibchange;
}
}
dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
ipath_stats.sps_iblink++;
if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
dd->ipath_flags |= IPATH_LINKDOWN;
dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
| IPATH_LINKACTIVE |
IPATH_LINKARMED);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
if (!noprint) {
if (((dd->ipath_lastibcstat >>
INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK)
== INFINIPATH_IBCS_L_STATE_ACTIVE)
/* if from up to down be more vocal */
ipath_cdbg(SMA,
"Unit %u link now down (%s)\n",
dd->ipath_unit,
ipath_ibcstatus_str[ltstate]);
else
ipath_cdbg(VERBOSE, "Unit %u link is "
"down (%s)\n", dd->ipath_unit,
ipath_ibcstatus_str[ltstate]);
}
dd->ipath_f_setextled(dd, lstate, ltstate);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
dd->ipath_flags |= IPATH_LINKACTIVE;
dd->ipath_flags &=
~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
IPATH_LINKARMED | IPATH_NOCABLE);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
*dd->ipath_statusp |=
IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
dd->ipath_f_setextled(dd, lstate, ltstate);
__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
/*
* set INIT and DOWN. Down is checked by most of the other
* code, but INIT is useful to know in a few places.
*/
dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
dd->ipath_flags &=
~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
| IPATH_NOCABLE);
*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
| IPATH_STATUS_IB_READY);
dd->ipath_f_setextled(dd, lstate, ltstate);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
dd->ipath_flags |= IPATH_LINKARMED;
dd->ipath_flags &=
~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
IPATH_LINKACTIVE | IPATH_NOCABLE);
*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
| IPATH_STATUS_IB_READY);
dd->ipath_f_setextled(dd, lstate, ltstate);
} else {
if (!noprint)
ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
dd->ipath_unit,
ipath_ibcstatus_str[ltstate], ltstate);
}
skip_ibchange:
dd->ipath_lastibcstat = val;
}
static void handle_supp_msgs(struct ipath_devdata *dd,
unsigned supp_msgs, char msg[512])
{
/*
* Print the message unless it's ibc status change only, which
* happens so often we never want to count it.
*/
if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
~INFINIPATH_E_IBSTATUSCHANGED);
if (dd->ipath_lasterror &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
ipath_dev_err(dd, "Suppressed %u messages for "
"fast-repeating errors (%s) (%llx)\n",
supp_msgs, msg,
(unsigned long long)
dd->ipath_lasterror);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal", for some
* types of processes (mostly benchmarks) that send
* huge numbers of messages, while not processing
* them. So only complain about these at debug
* level.
*/
ipath_dbg("Suppressed %u messages for %s\n",
supp_msgs, msg);
}
}
}
static unsigned handle_frequent_errors(struct ipath_devdata *dd,
ipath_err_t errs, char msg[512],
int *noprint)
{
unsigned long nc;
static unsigned long nextmsg_time;
static unsigned nmsgs, supp_msgs;
/*
* Throttle back "fast" messages to no more than 10 per 5 seconds.
* This isn't perfect, but it's a reasonable heuristic. If we get
* more than 10, give a 6x longer delay.
*/
nc = jiffies;
if (nmsgs > 10) {
if (time_before(nc, nextmsg_time)) {
*noprint = 1;
if (!supp_msgs++)
nextmsg_time = nc + HZ * 3;
}
else if (supp_msgs) {
handle_supp_msgs(dd, supp_msgs, msg);
supp_msgs = 0;
nmsgs = 0;
}
}
else if (!nmsgs++ || time_after(nc, nextmsg_time))
nextmsg_time = nc + HZ / 2;
return supp_msgs;
}
static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
{
char msg[512];
u64 ignore_this_time = 0;
int i;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
/*
* don't report errors that are masked (includes those always
* ignored)
*/
errs &= ~dd->ipath_maskederrs;
/* do these first, they are most important */
if (errs & INFINIPATH_E_HARDWARE) {
/* reuse same msg buf */
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
}
if (!noprint && (errs & ~infinipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
(errs & ~infinipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
if (supp_msgs == 250000) {
/*
* It's not entirely reasonable assuming that the errors set
* in the last clear period are all responsible for the
* problem, but the alternative is to assume it's the only
* ones on this particular interrupt, which also isn't great
*/
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
ipath_decode_err(msg, sizeof msg,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
ipath_dev_err(dd, "Disabling error(s) %llx because "
"occuring too frequently (%s)\n",
(unsigned long long)
(dd->ipath_maskederrs &
~dd->ipath_ignorederrs), msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
* for some types of processes (mostly benchmarks)
* that send huge numbers of messages, while not
* processing them. So only complain about
* these at debug level.
*/
ipath_dbg("Disabling frequent queue full errors "
"(%s)\n", msg);
}
/*
* Re-enable the masked errors after around 3 minutes. in
* ipath_get_faststats(). If we have a series of fast
* repeating but different errors, the interval will keep
* stretching out, but that's OK, as that's pretty
* catastrophic.
*/
dd->ipath_unmasktime = jiffies + HZ * 180;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
if (ignore_this_time)
errs &= ~ignore_this_time;
if (errs & ~dd->ipath_lasterror) {
errs &= ~dd->ipath_lasterror;
/* never suppress duplicate hwerrors or ibstatuschange */
dd->ipath_lasterror |= errs &
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
if (!errs)
return;
if (!noprint)
/*
* the ones we mask off are handled specially below or above
*/
ipath_decode_err(msg, sizeof msg,
errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
INFINIPATH_E_RRCVEGRFULL |
INFINIPATH_E_RRCVHDRFULL |
INFINIPATH_E_HARDWARE));
else
/* so we don't need if (!noprint) at strlcat's below */
*msg = 0;
if (errs & E_SUM_PKTERRS) {
ipath_stats.sps_pkterrs++;
chkerrpkts = 1;
}
if (errs & E_SUM_ERRS)
ipath_stats.sps_errs++;
if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
ipath_stats.sps_crcerrs++;
chkerrpkts = 1;
}
/*
* We don't want to print these two as they happen, or we can make
* the situation even worse, because it takes so long to print
* messages to serial consoles. Kernel ports get printed from
* fast_stats, no more than every 5 seconds, user ports get printed
* on close
*/
if (errs & INFINIPATH_E_RRCVHDRFULL) {
int any;
u32 hd, tl;
ipath_stats.sps_hdrqfull++;
for (any = i = 0; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (i == 0) {
hd = dd->ipath_port0head;
tl = (u32) le64_to_cpu(
*dd->ipath_hdrqtailptr);
} else if (pd && pd->port_cnt &&
pd->port_rcvhdrtail_kvaddr) {
/*
* don't report same point multiple times,
* except kernel
*/
tl = (u32) * pd->port_rcvhdrtail_kvaddr;
if (tl == dd->ipath_lastrcvhdrqtails[i])
continue;
hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
i);
} else
continue;
if (hd == (tl + 1) ||
(!hd && tl == dd->ipath_hdrqlast)) {
dd->ipath_lastrcvhdrqtails[i] = tl;
pd->port_hdrqfull++;
if (i == 0)
chkerrpkts = 1;
}
}
}
if (errs & INFINIPATH_E_RRCVEGRFULL) {
/*
* since this is of less importance and not likely to
* happen without also getting hdrfull, only count
* occurrences; don't check each port (or even the kernel
* vs user)
*/
ipath_stats.sps_etidfull++;
if (dd->ipath_port0head !=
(u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
chkerrpkts = 1;
}
/*
* do this before IBSTATUSCHANGED, in case both bits set in a single
* interrupt; we want the STATUSCHANGE to "win", so we do our
* internal copy of state machine correctly
*/
if (errs & INFINIPATH_E_RIBLOSTLINK) {
/*
* force through block below
*/
errs |= INFINIPATH_E_IBSTATUSCHANGED;
ipath_stats.sps_iblink++;
dd->ipath_flags |= IPATH_LINKDOWN;
dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
| IPATH_LINKARMED | IPATH_LINKACTIVE);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
if (!noprint) {
u64 st = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_ibcstatus);
ipath_dbg("Lost link, link now down (%s)\n",
ipath_ibcstatus_str[st & 0xf]);
}
}
if (errs & INFINIPATH_E_IBSTATUSCHANGED)
handle_e_ibstatuschanged(dd, errs, noprint);
if (errs & INFINIPATH_E_RESET) {
if (!noprint)
ipath_dev_err(dd, "Got reset, requires re-init "
"(unload and reload driver)\n");
dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */
/* mark as having had error */
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
}
if (!noprint && *msg)
ipath_dev_err(dd, "%s error\n", msg);
if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
"waking\n", dd->ipath_sma_state_wanted,
dd->ipath_flags);
wake_up_interruptible(&ipath_sma_state_wait);
}
if (chkerrpkts)
/* process possible error packets in hdrq */
ipath_kreceive(dd);
}
/* this is separate to allow for better optimization of ipath_intr() */
static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
{
/*
* sometimes happen during driver init and unload, don't want
* to process any interrupts at that point
*/
/* this is just a bandaid, not a fix, if something goes badly
* wrong */
if (++*unexpectp > 100) {
if (++*unexpectp > 105) {
/*
* ok, we must be taking somebody else's interrupts,
* due to a messed up mptable and/or PIRQ table, so
* unregister the interrupt. We've seen this during
* linuxbios development work, and it may happen in
* the future again.
*/
if (dd->pcidev && dd->pcidev->irq) {
ipath_dev_err(dd, "Now %u unexpected "
"interrupts, unregistering "
"interrupt handler\n",
*unexpectp);
ipath_dbg("free_irq of irq %x\n",
dd->pcidev->irq);
free_irq(dd->pcidev->irq, dd);
}
}
if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) {
ipath_dev_err(dd, "%u unexpected interrupts, "
"disabling interrupts completely\n",
*unexpectp);
/*
* disable all interrupts, something is very wrong
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
0ULL);
}
} else if (*unexpectp > 1)
ipath_dbg("Interrupt when not ready, should not happen, "
"ignoring\n");
}
static void ipath_bad_regread(struct ipath_devdata *dd)
{
static int allbits;
/* separate routine, for better optimization of ipath_intr() */
/*
* We print the message and disable interrupts, in hope of
* having a better chance of debugging the problem.
*/
ipath_dev_err(dd,
"Read of interrupt status failed (all bits set)\n");
if (allbits++) {
/* disable all interrupts, something is very wrong */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
if (allbits == 2) {
ipath_dev_err(dd, "Still bad interrupt status, "
"unregistering interrupt\n");
free_irq(dd->pcidev->irq, dd);
} else if (allbits > 2) {
if ((allbits % 10000) == 0)
printk(".");
} else
ipath_dev_err(dd, "Disabling interrupts, "
"multiple errors\n");
}
}
static void handle_port_pioavail(struct ipath_devdata *dd)
{
u32 i;
/*
* start from port 1, since for now port 0 is never using
* wait_event for PIO
*/
for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (pd && pd->port_cnt &&
dd->ipath_portpiowait & (1U << i)) {
clear_bit(i, &dd->ipath_portpiowait);
if (test_bit(IPATH_PORT_WAITING_PIO,
&pd->port_flag)) {
clear_bit(IPATH_PORT_WAITING_PIO,
&pd->port_flag);
wake_up_interruptible(&pd->port_wait);
}
}
}
}
static void handle_layer_pioavail(struct ipath_devdata *dd)
{
int ret;
ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
if (ret > 0)
goto clear;
ret = __ipath_verbs_piobufavail(dd);
if (ret > 0)
goto clear;
return;
clear:
set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
}
static void handle_rcv(struct ipath_devdata *dd, u32 istat)
{
u64 portr;
int i;
int rcvdint = 0;
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
infinipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
infinipath_i_rcvurg_mask);
for (i = 0; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd &&
pd->port_cnt) {
if (i == 0)
ipath_kreceive(dd);
else if (test_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag)) {
int rcbit;
clear_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag);
rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
wake_up_interruptible(&pd->port_wait);
rcvdint = 1;
}
}
}
if (rcvdint) {
/* only want to take one interrupt, so turn off the rcv
* interrupt for all the ports that we did the wakeup on
* (but never for kernel port)
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
}
}
irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
{
struct ipath_devdata *dd = data;
u32 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
ipath_err_t estat = 0;
static unsigned unexpected = 0;
irqreturn_t ret;
if (unlikely(!istat)) {
ipath_stats.sps_nullintr++;
ret = IRQ_NONE; /* not our interrupt, or already handled */
goto bail;
}
if (unlikely(istat == -1)) {
ipath_bad_regread(dd);
/* don't know if it was our interrupt or not */
ret = IRQ_NONE;
goto bail;
}
ipath_stats.sps_ints++;
/*
* this needs to be flags&initted, not statusp, so we keep
* taking interrupts even after link goes down, etc.
* Also, we *must* clear the interrupt at some point, or we won't
* take it again, which can be real bad for errors, etc...
*/
if (!(dd->ipath_flags & IPATH_INITTED)) {
ipath_bad_intr(dd, &unexpected);
ret = IRQ_NONE;
goto bail;
}
if (unexpected)
unexpected = 0;
ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
if (istat & ~infinipath_i_bitsextant)
ipath_dev_err(dd,
"interrupt with unknown interrupts %x set\n",
istat & (u32) ~ infinipath_i_bitsextant);
if (istat & INFINIPATH_I_ERROR) {
ipath_stats.sps_errints++;
estat = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_errorstatus);
if (!estat)
dev_info(&dd->pcidev->dev, "error interrupt (%x), "
"but no error bits set!\n", istat);
else if (estat == -1LL)
/*
* should we try clearing all, or hope next read
* works?
*/
ipath_dev_err(dd, "Read of error status failed "
"(all bits set); ignoring\n");
else
handle_errors(dd, estat);
}
if (istat & INFINIPATH_I_GPIO) {
if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
u32 gpiostatus;
gpiostatus = ipath_read_kreg32(
dd, dd->ipath_kregs->kr_gpio_status);
ipath_dbg("Unexpected GPIO interrupt bits %x\n",
gpiostatus);
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
gpiostatus);
}
else {
/* Clear GPIO status bit 2 */
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
(u64) (1 << 2));
/*
* Packets are available in the port 0 rcv queue.
* Eventually this needs to be generalized to check
* IPATH_GPIO_INTR, and the specific GPIO bit, if
* GPIO interrupts are used for anything else.
*/
ipath_kreceive(dd);
}
}
/*
* clear the ones we will deal with on this round
* We clear it early, mostly for receive interrupts, so we
* know the chip will have seen this by the time we process
* the queue, and will re-interrupt if necessary. The processor
* itself won't take the interrupt again until we return.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
if (dd->ipath_portpiowait)
handle_port_pioavail(dd);
handle_layer_pioavail(dd);
}
/*
* we check for both transition from empty to non-empty, and urgent
* packets (those with the interrupt bit set in the header)
*/
if (istat & ((infinipath_i_rcvavail_mask <<
INFINIPATH_I_RCVAVAIL_SHIFT)
| (infinipath_i_rcvurg_mask <<
INFINIPATH_I_RCVURG_SHIFT)))
handle_rcv(dd, istat);
ret = IRQ_HANDLED;
bail:
return ret;
}

View File

@ -0,0 +1,884 @@
#ifndef _IPATH_KERNEL_H
#define _IPATH_KERNEL_H
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This header file is the base header file for infinipath kernel code
* ipath_user.h serves a similar purpose for user code.
*/
#include <linux/interrupt.h>
#include <asm/io.h>
#include "ipath_common.h"
#include "ipath_debug.h"
#include "ipath_registers.h"
/* only s/w major version of InfiniPath we can handle */
#define IPATH_CHIP_VERS_MAJ 2U
/* don't care about this except printing */
#define IPATH_CHIP_VERS_MIN 0U
/* temporary, maybe always */
extern struct infinipath_stats ipath_stats;
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
struct ipath_portdata {
void **port_rcvegrbuf;
dma_addr_t *port_rcvegrbuf_phys;
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
u64 *port_rcvhdrtail_kvaddr;
/* page * used for uaddr */
struct page *port_rcvhdrtail_pagep;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
*/
void *port_tid_pg_list;
/* when waiting for rcv or pioavail */
wait_queue_head_t port_wait;
/*
* rcvegr bufs base, physical, must fit
* in 44 bits so 32 bit programs mmap64 44 bit works)
*/
dma_addr_t port_rcvegr_phys;
/* mmap of hdrq, must fit in 44 bits */
dma_addr_t port_rcvhdrq_phys;
/*
* the actual user address that we ipath_mlock'ed, so we can
* ipath_munlock it at close
*/
unsigned long port_rcvhdrtail_uaddr;
/*
* number of opens on this instance (0 or 1; ignoring forks, dup,
* etc. for now)
*/
int port_cnt;
/*
* how much space to leave at start of eager TID entries for
* protocol use, on each TID
*/
/* instead of calculating it */
unsigned port_port;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
u32 port_rcvegrbuf_chunks;
/* how many egrbufs per chunk */
u32 port_rcvegrbufs_perchunk;
/* order for port_rcvegrbuf_pages */
size_t port_rcvegrbuf_size;
/* rcvhdrq size (for freeing) */
size_t port_rcvhdrq_size;
/* next expected TID to check when looking for free */
u32 port_tidcursor;
/* next expected TID to check */
unsigned long port_flag;
/* WAIT_RCV that timed out, no interrupt */
u32 port_rcvwait_to;
/* WAIT_PIO that timed out, no interrupt */
u32 port_piowait_to;
/* WAIT_RCV already happened, no wait */
u32 port_rcvnowait;
/* WAIT_PIO already happened, no wait */
u32 port_pionowait;
/* total number of rcvhdrqfull errors */
u32 port_hdrqfull;
/* pid of process using this port */
pid_t port_pid;
/* same size as task_struct .comm[] */
char port_comm[16];
/* pkeys set by this use of this port */
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
};
struct sk_buff;
/*
* control information for layered drivers
*/
struct _ipath_layer {
void *l_arg;
};
/* Verbs layer interface */
struct _verbs_layer {
void *l_arg;
struct timer_list l_timer;
};
struct ipath_devdata {
struct list_head ipath_list;
struct ipath_kregs const *ipath_kregs;
struct ipath_cregs const *ipath_cregs;
/* mem-mapped pointer to base of chip regs */
u64 __iomem *ipath_kregbase;
/* end of mem-mapped chip space; range checking */
u64 __iomem *ipath_kregend;
/* physical address of chip for io_remap, etc. */
unsigned long ipath_physaddr;
/* base of memory alloced for ipath_kregbase, for free */
u64 *ipath_kregalloc;
/*
* version of kregbase that doesn't have high bits set (for 32 bit
* programs, so mmap64 44 bit works)
*/
u64 __iomem *ipath_kregvirt;
/*
* virtual address where port0 rcvhdrqtail updated for this unit.
* only written to by the chip, not the driver.
*/
volatile __le64 *ipath_hdrqtailptr;
dma_addr_t ipath_dma_addr;
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
struct sk_buff **ipath_port0_skbs;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
void __iomem *ipath_pio4kbase;
/*
* points to area where PIOavail registers will be DMA'ed.
* Has to be on a page of it's own, because the page will be
* mapped into user program space. This copy is *ONLY* ever
* written by DMA, not by the driver! Need a copy per device
* when we get to multiple devices
*/
volatile __le64 *ipath_pioavailregs_dma;
/* physical address where updates occur */
dma_addr_t ipath_pioavailregs_phys;
struct _ipath_layer ipath_layer;
/* setup intr */
int (*ipath_f_intrsetup)(struct ipath_devdata *);
/* setup on-chip bus config */
int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
/* hard reset chip */
int (*ipath_f_reset)(struct ipath_devdata *);
int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
size_t);
void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
size_t);
void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
int (*ipath_f_early_init)(struct ipath_devdata *);
void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
u32, unsigned long);
void (*ipath_f_tidtemplate)(struct ipath_devdata *);
void (*ipath_f_cleanup)(struct ipath_devdata *);
void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
/* fill out chip-specific fields */
int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
struct _verbs_layer verbs_layer;
/* total dwords sent (summed from counter) */
u64 ipath_sword;
/* total dwords rcvd (summed from counter) */
u64 ipath_rword;
/* total packets sent (summed from counter) */
u64 ipath_spkts;
/* total packets rcvd (summed from counter) */
u64 ipath_rpkts;
/* ipath_statusp initially points to this. */
u64 _ipath_status;
/* GUID for this interface, in network order */
__be64 ipath_guid;
/*
* aggregrate of error bits reported since last cleared, for
* limiting of error reporting
*/
ipath_err_t ipath_lasterror;
/*
* aggregrate of error bits reported since last cleared, for
* limiting of hwerror reporting
*/
ipath_err_t ipath_lasthwerror;
/*
* errors masked because they occur too fast, also includes errors
* that are always ignored (ipath_ignorederrs)
*/
ipath_err_t ipath_maskederrs;
/* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime;
/*
* errors always ignored (masked), at least for a given
* chip/device, because they are wrong or not useful
*/
ipath_err_t ipath_ignorederrs;
/* count of egrfull errors, combined for all ports */
u64 ipath_last_tidfull;
/* for ipath_qcheck() */
u64 ipath_lastport0rcv_cnt;
/* template for writing TIDs */
u64 ipath_tidtemplate;
/* value to write to free TIDs */
u64 ipath_tidinvalid;
/* PE-800 rcv interrupt setup */
u64 ipath_rhdrhead_intr_off;
/* size of memory at ipath_kregbase */
u32 ipath_kregsize;
/* number of registers used for pioavail */
u32 ipath_pioavregs;
/* IPATH_POLL, etc. */
u32 ipath_flags;
/* ipath_flags sma is waiting for */
u32 ipath_sma_state_wanted;
/* last buffer for user use, first buf for kernel use is this
* index. */
u32 ipath_lastport_piobuf;
/* is a stats timer active */
u32 ipath_stats_timer_active;
/* dwords sent read from counter */
u32 ipath_lastsword;
/* dwords received read from counter */
u32 ipath_lastrword;
/* sent packets read from counter */
u32 ipath_lastspkts;
/* received packets read from counter */
u32 ipath_lastrpkts;
/* pio bufs allocated per port */
u32 ipath_pbufsport;
/*
* number of ports configured as max; zero is set to number chip
* supports, less gives more pio bufs/port, etc.
*/
u32 ipath_cfgports;
/* port0 rcvhdrq head offset */
u32 ipath_port0head;
/* count of port 0 hdrqfull errors */
u32 ipath_p0_hdrqfull;
/*
* (*cfgports) used to suppress multiple instances of same
* port staying stuck at same point
*/
u32 *ipath_lastrcvhdrqtails;
/*
* (*cfgports) used to suppress multiple instances of same
* port staying stuck at same point
*/
u32 *ipath_lastegrheads;
/*
* index of last piobuffer we used. Speeds up searching, by
* starting at this point. Doesn't matter if multiple cpu's use and
* update, last updater is only write that matters. Whenever it
* wraps, we update shadow copies. Need a copy per device when we
* get to multiple devices
*/
u32 ipath_lastpioindex;
/* max length of freezemsg */
u32 ipath_freezelen;
/*
* consecutive times we wanted a PIO buffer but were unable to
* get one
*/
u32 ipath_consec_nopiobuf;
/*
* hint that we should update ipath_pioavailshadow before
* looking for a PIO buffer
*/
u32 ipath_upd_pio_shadow;
/* so we can rewrite it after a chip reset */
u32 ipath_pcibar0;
/* so we can rewrite it after a chip reset */
u32 ipath_pcibar1;
/* sequential tries for SMA send and no bufs */
u32 ipath_nosma_bufs;
/* duration (seconds) ipath_nosma_bufs set */
u32 ipath_nosma_secs;
/* HT/PCI Vendor ID (here for NodeInfo) */
u16 ipath_vendorid;
/* HT/PCI Device ID (here for NodeInfo) */
u16 ipath_deviceid;
/* offset in HT config space of slave/primary interface block */
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of all exptids physaddr; used only by funcsim */
u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
/* lock to workaround chip bug 9437 */
spinlock_t ipath_tid_lock;
/*
* IPATH_STATUS_*,
* this address is mapped readonly into user processes so they can
* get status cheaply, whenever they want.
*/
u64 *ipath_statusp;
/* freeze msg if hw error put chip in freeze */
char *ipath_freezemsg;
/* pci access data structure */
struct pci_dev *pcidev;
struct cdev *cdev;
struct class_device *class_dev;
/* timer used to prevent stats overflow, error throttling, etc. */
struct timer_list ipath_stats_timer;
/* check for stale messages in rcv queue */
/* only allow one intr at a time. */
unsigned long ipath_rcv_pending;
/*
* Shadow copies of registers; size indicates read access size.
* Most of them are readonly, but some are write-only register,
* where we manipulate the bits in the shadow copy, and then write
* the shadow copy to infinipath.
*
* We deliberately make most of these 32 bits, since they have
* restricted range. For any that we read, we won't to generate 32
* bit accesses, since Opteron will generate 2 separate 32 bit HT
* transactions for a 64 bit read, and we want to avoid unnecessary
* HT transactions.
*/
/* This is the 64 bit group */
/*
* shadow of pioavail, check to be sure it's large enough at
* init time.
*/
unsigned long ipath_pioavailshadow[8];
/* shadow of kr_gpio_out, for rmw ops */
u64 ipath_gpio_out;
/* kr_revision shadow */
u64 ipath_revision;
/*
* shadow of ibcctrl, for interrupt handling of link changes,
* etc.
*/
u64 ipath_ibcctrl;
/*
* last ibcstatus, to suppress "duplicate" status change messages,
* mostly from 2 to 3
*/
u64 ipath_lastibcstat;
/* hwerrmask shadow */
ipath_err_t ipath_hwerrmask;
/* interrupt config reg shadow */
u64 ipath_intconfig;
/* kr_sendpiobufbase value */
u64 ipath_piobufbase;
/* these are the "32 bit" regs */
/*
* number of GUIDs in the flash for this interface; may need some
* rethinking for setting on other ifaces
*/
u32 ipath_nguid;
/*
* the following two are 32-bit bitmasks, but {test,clear,set}_bit
* all expect bit fields to be "unsigned long"
*/
/* shadow kr_rcvctrl */
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
/* value we put in kr_rcvhdrsize */
u32 ipath_rcvhdrsize;
/* value we put in kr_rcvhdrentsize */
u32 ipath_rcvhdrentsize;
/* offset of last entry in rcvhdrq */
u32 ipath_hdrqlast;
/* kr_portcnt value */
u32 ipath_portcnt;
/* kr_pagealign value */
u32 ipath_palign;
/* number of "2KB" PIO buffers */
u32 ipath_piobcnt2k;
/* size in bytes of "2KB" PIO buffers */
u32 ipath_piosize2k;
/* number of "4KB" PIO buffers */
u32 ipath_piobcnt4k;
/* size in bytes of "4KB" PIO buffers */
u32 ipath_piosize4k;
/* kr_rcvegrbase value */
u32 ipath_rcvegrbase;
/* kr_rcvegrcnt value */
u32 ipath_rcvegrcnt;
/* kr_rcvtidbase value */
u32 ipath_rcvtidbase;
/* kr_rcvtidcnt value */
u32 ipath_rcvtidcnt;
/* kr_sendregbase */
u32 ipath_sregbase;
/* kr_userregbase */
u32 ipath_uregbase;
/* kr_counterregbase */
u32 ipath_cregbase;
/* shadow the control register contents */
u32 ipath_control;
/* shadow the gpio output contents */
u32 ipath_extctrl;
/* PCI revision register (HTC rev on FPGA) */
u32 ipath_pcirev;
/* chip address space used by 4k pio buffers */
u32 ipath_4kalign;
/* The MTU programmed for this unit */
u32 ipath_ibmtu;
/*
* The max size IB packet, included IB headers that we can send.
* Starts same as ipath_piosize, but is affected when ibmtu is
* changed, or by size of eager buffers
*/
u32 ipath_ibmaxlen;
/*
* ibmaxlen at init time, limited by chip and by receive buffer
* size. Not changed after init.
*/
u32 ipath_init_ibmaxlen;
/* size of each rcvegrbuffer */
u32 ipath_rcvegrbufsize;
/* width (2,4,8,16,32) from HT config reg */
u32 ipath_htwidth;
/* HT speed (200,400,800,1000) from HT config */
u32 ipath_htspeed;
/* ports waiting for PIOavail intr */
unsigned long ipath_portpiowait;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
*/
u32 ipath_ibpollcnt;
/* low and high portions of MSI capability/vector */
u32 ipath_msi_lo;
/* saved after PCIe init for restore after reset */
u32 ipath_msi_hi;
/* MSI data (vector) saved for restore */
u16 ipath_msi_data;
/* MLID programmed for this instance */
u16 ipath_mlid;
/* LID programmed for this instance */
u16 ipath_lid;
/* list of pkeys programmed; 0 if not set */
u16 ipath_pkeys[4];
/* ASCII serial number, from flash */
u8 ipath_serial[12];
/* human readable board version */
u8 ipath_boardversion[80];
/* chip major rev, from ipath_revision */
u8 ipath_majrev;
/* chip minor rev, from ipath_revision */
u8 ipath_minrev;
/* board rev, from ipath_revision */
u8 ipath_boardrev;
/* unit # of this chip, if present */
int ipath_unit;
/* saved for restore after reset */
u8 ipath_pci_cacheline;
/* LID mask control */
u8 ipath_lmc;
};
extern volatile __le64 *ipath_port0_rcvhdrtail;
extern dma_addr_t ipath_port0_rcvhdrtail_dma;
#define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
extern u16 ipath_layer_rcv_opcode;
extern int ipath_verbs_registered;
extern int __ipath_layer_intr(struct ipath_devdata *, u32);
extern int ipath_layer_intr(struct ipath_devdata *, u32);
extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
struct sk_buff *);
extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
void ipath_layer_add(struct ipath_devdata *);
void ipath_layer_del(struct ipath_devdata *);
int ipath_init_chip(struct ipath_devdata *, int);
int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
struct cdev **cdevp, struct class_device **class_devp);
void ipath_cdev_cleanup(struct cdev **cdevp,
struct class_device **class_devp);
int ipath_diag_init(void);
void ipath_diag_cleanup(void);
void ipath_diag_bringup_link(struct ipath_devdata *);
extern wait_queue_head_t ipath_sma_state_wait;
int ipath_user_add(struct ipath_devdata *dd);
void ipath_user_del(struct ipath_devdata *dd);
struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
extern int ipath_diag_inuse;
irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs);
void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
#if __IPATH_INFO || __IPATH_DBG
extern const char *ipath_ibcstatus_str[];
#endif
/* clean up any per-chip chip-specific stuff */
void ipath_chip_cleanup(struct ipath_devdata *);
/* clean up any chip type-specific stuff */
void ipath_chip_done(void);
/* check to see if we have to force ordering for write combining */
int ipath_unordered_wc(void);
void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
unsigned cnt);
int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
void ipath_free_pddata(struct ipath_devdata *, u32, int);
int ipath_parse_ushort(const char *str, unsigned short *valp);
int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
void ipath_set_ib_lstate(struct ipath_devdata *, int);
void ipath_kreceive(struct ipath_devdata *);
int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
int ipath_reset_device(int);
void ipath_get_faststats(unsigned long);
/* for use in system calls, where we want to know device type, etc. */
#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
/*
* values for ipath_flags
*/
/* The chip is up and initted */
#define IPATH_INITTED 0x2
/* set if any user code has set kr_rcvhdrsize */
#define IPATH_RCVHDRSZ_SET 0x4
/* The chip is present and valid for accesses */
#define IPATH_PRESENT 0x8
/* HT link0 is only 8 bits wide, ignore upper byte crc
* errors, etc. */
#define IPATH_8BIT_IN_HT0 0x10
/* HT link1 is only 8 bits wide, ignore upper byte crc
* errors, etc. */
#define IPATH_8BIT_IN_HT1 0x20
/* The link is down */
#define IPATH_LINKDOWN 0x40
/* The link level is up (0x11) */
#define IPATH_LINKINIT 0x80
/* The link is in the armed (0x21) state */
#define IPATH_LINKARMED 0x100
/* The link is in the active (0x31) state */
#define IPATH_LINKACTIVE 0x200
/* link current state is unknown */
#define IPATH_LINKUNK 0x400
/* no IB cable, or no device on IB cable */
#define IPATH_NOCABLE 0x4000
/* Supports port zero per packet receive interrupts via
* GPIO */
#define IPATH_GPIO_INTR 0x8000
/* uses the coded 4byte TID, not 8 byte */
#define IPATH_4BYTE_TID 0x10000
/* packet/word counters are 32 bit, else those 4 counters
* are 64bit */
#define IPATH_32BITCOUNTERS 0x20000
/* can miss port0 rx interrupts */
#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
#define IPATH_PORT_WAITING_RCV 2
/* waiting for a PIO buffer to be available */
#define IPATH_PORT_WAITING_PIO 3
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
/* init PE-800-specific func */
void ipath_init_pe800_funcs(struct ipath_devdata *);
/* init HT-400-specific func */
void ipath_init_ht400_funcs(struct ipath_devdata *);
void ipath_get_guid(struct ipath_devdata *);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
/*
* number of words used for protocol header if not set by ipath_userinit();
*/
#define IPATH_DFLT_RCVHDRSIZE 9
#define IPATH_MDIO_CMD_WRITE 1
#define IPATH_MDIO_CMD_READ 2
#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */
#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */
#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */
#define IPATH_MDIO_CTRL_STD 0x0
static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
{
return (((u64) IPATH_MDIO_CLD_DIV) << 32) |
(cmd << 26) |
(dev << 21) |
(reg << 16) |
(data & 0xFFFF);
}
/* signal and fifo status, in bank 31 */
#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8
/* controls loopback, redundancy */
#define IPATH_MDIO_CTRL_8355_REG_1 0x10
/* premph, encdec, etc. */
#define IPATH_MDIO_CTRL_8355_REG_2 0x11
/* Kchars, etc. */
#define IPATH_MDIO_CTRL_8355_REG_6 0x15
#define IPATH_MDIO_CTRL_8355_REG_9 0x18
#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
int ipath_get_user_pages(unsigned long, size_t, struct page **);
int ipath_get_user_pages_nocopy(unsigned long, struct page **);
void ipath_release_user_pages(struct page **, size_t);
void ipath_release_user_pages_on_close(struct page **, size_t);
int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
/* these are used for the registers that vary with port */
void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
unsigned, u64);
u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
unsigned);
/*
* We could have a single register get/put routine, that takes a group type,
* but this is somewhat clearer and cleaner. It also gives us some error
* checking. 64 bit register reads should always work, but are inefficient
* on opteron (the northbridge always generates 2 separate HT 32 bit reads),
* so we use kreg32 wherever possible. User register and counter register
* reads are always 32 bit reads, so only one form of those routines.
*/
/*
* At the moment, none of the s-registers are writable, so no
* ipath_write_sreg(), and none of the c-registers are writable, so no
* ipath_write_creg().
*/
/**
* ipath_read_ureg32 - read 32-bit virtualized per-port register
* @dd: device
* @regno: register number
* @port: port number
*
* Return the contents of a register that is virtualized to be per port.
* Prints a debug message and returns -1 on errors (not distinguishable from
* valid contents at runtime; we may add a separate error variable at some
* point).
*
* This is normally not used by the kernel, but may be for debugging, and
* has a different implementation than user mode, which is why it's not in
* _common.h.
*/
static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
ipath_ureg regno, int port)
{
if (!dd->ipath_kregbase)
return 0;
return readl(regno + (u64 __iomem *)
(dd->ipath_uregbase +
(char __iomem *)dd->ipath_kregbase +
dd->ipath_palign * port));
}
/**
* ipath_write_ureg - write 32-bit virtualized per-port register
* @dd: device
* @regno: register number
* @value: value
* @port: port
*
* Write the contents of a register that is virtualized to be per port.
*/
static inline void ipath_write_ureg(const struct ipath_devdata *dd,
ipath_ureg regno, u64 value, int port)
{
u64 __iomem *ubase = (u64 __iomem *)
(dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
dd->ipath_palign * port);
if (dd->ipath_kregbase)
writeq(value, &ubase[regno]);
}
static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
ipath_kreg regno)
{
if (!dd->ipath_kregbase)
return -1;
return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
}
static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
ipath_kreg regno)
{
if (!dd->ipath_kregbase)
return -1;
return readq(&dd->ipath_kregbase[regno]);
}
static inline void ipath_write_kreg(const struct ipath_devdata *dd,
ipath_kreg regno, u64 value)
{
if (dd->ipath_kregbase)
writeq(value, &dd->ipath_kregbase[regno]);
}
static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
ipath_sreg regno)
{
if (!dd->ipath_kregbase)
return 0;
return readq(regno + (u64 __iomem *)
(dd->ipath_cregbase +
(char __iomem *)dd->ipath_kregbase));
}
static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
ipath_sreg regno)
{
if (!dd->ipath_kregbase)
return 0;
return readl(regno + (u64 __iomem *)
(dd->ipath_cregbase +
(char __iomem *)dd->ipath_kregbase));
}
/*
* sysfs interface.
*/
struct device_driver;
extern const char ipath_core_version[];
int ipath_driver_create_group(struct device_driver *);
void ipath_driver_remove_group(struct device_driver *);
int ipath_device_create_group(struct device *, struct ipath_devdata *);
void ipath_device_remove_group(struct device *, struct ipath_devdata *);
int ipath_expose_reset(struct device *);
int ipath_init_ipathfs(void);
void ipath_exit_ipathfs(void);
int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
*/
#if defined(CONFIG_X86_64)
#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
#else
#define ipath_flush_wc() wmb()
#endif
extern unsigned ipath_debug; /* debugging bit mask */
const char *ipath_get_unit_name(int unit);
extern struct mutex ipath_mutex;
#define IPATH_DRV_NAME "ipath_core"
#define IPATH_MAJOR 233
#define IPATH_SMA_MINOR 128
#define IPATH_DIAG_MINOR 129
#define IPATH_NMINORS 130
#define ipath_dev_err(dd,fmt,...) \
do { \
const struct ipath_devdata *__dd = (dd); \
if (__dd->pcidev) \
dev_err(&__dd->pcidev->dev, "%s: " fmt, \
ipath_get_unit_name(__dd->ipath_unit), \
##__VA_ARGS__); \
else \
printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
ipath_get_unit_name(__dd->ipath_unit), \
##__VA_ARGS__); \
} while (0)
#if _IPATH_DEBUGGING
# define __IPATH_DBG_WHICH(which,fmt,...) \
do { \
if(unlikely(ipath_debug&(which))) \
printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
__func__,##__VA_ARGS__); \
} while(0)
# define ipath_dbg(fmt,...) \
__IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
# define ipath_cdbg(which,fmt,...) \
__IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
#else /* ! _IPATH_DEBUGGING */
# define ipath_dbg(fmt,...)
# define ipath_cdbg(which,fmt,...)
#endif /* _IPATH_DEBUGGING */
#endif /* _IPATH_KERNEL_H */

View File

@ -0,0 +1,236 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <asm/io.h>
#include "ipath_verbs.h"
/**
* ipath_alloc_lkey - allocate an lkey
* @rkt: lkey table in which to allocate the lkey
* @mr: memory region that this lkey protects
*
* Returns 1 if successful, otherwise returns 0.
*/
int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
{
unsigned long flags;
u32 r;
u32 n;
int ret;
spin_lock_irqsave(&rkt->lock, flags);
/* Find the next available LKEY */
r = n = rkt->next;
for (;;) {
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
_VERBS_INFO("LKEY table full\n");
ret = 0;
goto bail;
}
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
* Make sure lkey is never zero which is reserved to indicate an
* unrestricted LKEY.
*/
rkt->gen++;
mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
<< 8);
if (mr->lkey == 0) {
mr->lkey |= 1 << 8;
rkt->gen++;
}
rkt->table[r] = mr;
spin_unlock_irqrestore(&rkt->lock, flags);
ret = 1;
bail:
return ret;
}
/**
* ipath_free_lkey - free an lkey
* @rkt: table from which to free the lkey
* @lkey: lkey id to free
*/
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
{
unsigned long flags;
u32 r;
if (lkey == 0)
return;
r = lkey >> (32 - ib_ipath_lkey_table_size);
spin_lock_irqsave(&rkt->lock, flags);
rkt->table[r] = NULL;
spin_unlock_irqrestore(&rkt->lock, flags);
}
/**
* ipath_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
* @isge: outgoing internal SGE
* @sge: SGE to check
* @acc: access flags
*
* Return 1 if valid and successful, otherwise returns 0.
*
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
struct ipath_mregion *mr;
size_t off;
int ret;
/*
* We use LKEY == zero to mean a physical kmalloc() address.
* This is a bit of a hack since we rely on dma_map_single()
* being reversible by calling bus_to_virt().
*/
if (sge->lkey == 0) {
isge->mr = NULL;
isge->vaddr = bus_to_virt(sge->addr);
isge->length = sge->length;
isge->sge_length = sge->length;
ret = 1;
goto bail;
}
spin_lock(&rkt->lock);
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
spin_unlock(&rkt->lock);
if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
ret = 0;
goto bail;
}
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc)) {
ret = 0;
goto bail;
}
off += mr->offset;
isge->mr = mr;
isge->m = 0;
isge->n = 0;
while (off >= mr->map[isge->m]->segs[isge->n].length) {
off -= mr->map[isge->m]->segs[isge->n].length;
isge->n++;
if (isge->n >= IPATH_SEGSZ) {
isge->m++;
isge->n = 0;
}
}
isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off;
isge->length = mr->map[isge->m]->segs[isge->n].length - off;
isge->sge_length = sge->length;
ret = 1;
bail:
return ret;
}
/**
* ipath_rkey_ok - check the IB virtual address, length, and RKEY
* @dev: infiniband device
* @ss: SGE state
* @len: length of data
* @vaddr: virtual address to place data
* @rkey: rkey to check
* @acc: access flags
*
* Return 1 if successful, otherwise 0.
*
* The QP r_rq.lock should be held.
*/
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
size_t off;
int ret;
spin_lock(&rkt->lock);
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
spin_unlock(&rkt->lock);
if (unlikely(mr == NULL || mr->lkey != rkey)) {
ret = 0;
goto bail;
}
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0)) {
ret = 0;
goto bail;
}
off += mr->offset;
sge->mr = mr;
sge->m = 0;
sge->n = 0;
while (off >= mr->map[sge->m]->segs[sge->n].length) {
off -= mr->map[sge->m]->segs[sge->n].length;
sge->n++;
if (sge->n >= IPATH_SEGSZ) {
sge->m++;
sge->n = 0;
}
}
sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off;
sge->length = mr->map[sge->m]->segs[sge->n].length - off;
sge->sge_length = len;
ss->sg_list = NULL;
ss->num_sge = 1;
ret = 1;
bail:
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,181 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_LAYER_H
#define _IPATH_LAYER_H
/*
* This header file is for symbols shared between the infinipath driver
* and drivers layered upon it (such as ipath).
*/
struct sk_buff;
struct ipath_sge_state;
struct ipath_devdata;
struct ether_header;
struct ipath_layer_counters {
u64 symbol_error_counter;
u64 link_error_recovery_counter;
u64 link_downed_counter;
u64 port_rcv_errors;
u64 port_rcv_remphys_errors;
u64 port_xmit_discards;
u64 port_xmit_data;
u64 port_rcv_data;
u64 port_xmit_packets;
u64 port_rcv_packets;
};
/*
* A segment is a linear region of low physical memory.
* XXX Maybe we should use phys addr here and kmap()/kunmap().
* Used by the verbs layer.
*/
struct ipath_seg {
void *vaddr;
size_t length;
};
/* The number of ipath_segs that fit in a page. */
#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
struct ipath_segarray {
struct ipath_seg segs[IPATH_SEGSZ];
};
struct ipath_mregion {
u64 user_base; /* User's address for this region */
u64 iova; /* IB start address of this region */
size_t length;
u32 lkey;
u32 offset; /* offset (bytes) to start of region */
int access_flags;
u32 max_segs; /* number of ipath_segs in all the arrays */
u32 mapsz; /* size of the map array */
struct ipath_segarray *map[0]; /* the segments */
};
/*
* These keep track of the copy progress within a memory region.
* Used by the verbs layer.
*/
struct ipath_sge {
struct ipath_mregion *mr;
void *vaddr; /* current pointer into the segment */
u32 sge_length; /* length of the SGE */
u32 length; /* remaining length of the segment */
u16 m; /* current index: mr->map[m] */
u16 n; /* current index: mr->map[m]->segs[n] */
};
struct ipath_sge_state {
struct ipath_sge *sg_list; /* next SGE to be used if any */
struct ipath_sge sge; /* progress state for the current SGE */
u8 num_sge;
};
int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
void (*l_remove)(void *),
int (*l_intr)(void *, u32),
int (*l_rcv)(void *, void *,
struct sk_buff *),
u16 rcv_opcode,
int (*l_rcv_lid)(void *, void *));
int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
void (*l_remove)(void *arg),
int (*l_piobufavail)(void *arg),
void (*l_rcv)(void *arg, void *rhdr,
void *data, u32 tlen),
void (*l_timer_cb)(void *arg));
void ipath_layer_unregister(void);
void ipath_verbs_unregister(void);
int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
u16 ipath_layer_get_lid(struct ipath_devdata *dd);
int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
int ipath_layer_set_mtu(struct ipath_devdata *, u16);
int ipath_set_sps_lid(struct ipath_devdata *, u32, u8);
int ipath_layer_send_hdr(struct ipath_devdata *dd,
struct ether_header *hdr);
int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
u32 * hdr, u32 len, struct ipath_sge_state *ss);
int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
size_t namelen);
int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
u64 *rwords, u64 *spkts, u64 *rpkts,
u64 *xmit_wait);
int ipath_layer_get_counters(struct ipath_devdata *dd,
struct ipath_layer_counters *cntrs);
int ipath_layer_want_buffer(struct ipath_devdata *dd);
int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
__be64 ipath_layer_get_guid(struct ipath_devdata *);
u32 ipath_layer_get_nguid(struct ipath_devdata *);
int ipath_layer_query_device(struct ipath_devdata *, u32 * vendor,
u32 * boardrev, u32 * majrev, u32 * minrev);
u32 ipath_layer_get_flags(struct ipath_devdata *dd);
struct device *ipath_layer_get_device(struct ipath_devdata *dd);
u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
int ipath_layer_enable_timer(struct ipath_devdata *dd);
int ipath_layer_disable_timer(struct ipath_devdata *dd);
int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
int sleep);
int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
/* ipath_ether interrupt values */
#define IPATH_LAYER_INT_IF_UP 0x2
#define IPATH_LAYER_INT_IF_DOWN 0x4
#define IPATH_LAYER_INT_LID 0x8
#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
#define IPATH_LAYER_INT_BCAST 0x40
/* _verbs_layer.l_flags */
#define IPATH_VERBS_KERNEL_SMA 0x1
extern unsigned ipath_debug; /* debugging bit mask */
#endif /* _IPATH_LAYER_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,383 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
/**
* ipath_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
* @acc: access flags
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
{
struct ipath_mr *mr;
struct ib_mr *ret;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
mr->mr.access_flags = acc;
ret = &mr->ibmr;
bail:
return ret;
}
static struct ipath_mr *alloc_mr(int count,
struct ipath_lkey_table *lk_table)
{
struct ipath_mr *mr;
int m, i = 0;
/* Allocate struct plus pointers to first level page tables. */
m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
if (!mr)
goto done;
/* Allocate first level page tables. */
for (; i < m; i++) {
mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
if (!mr->mr.map[i])
goto bail;
}
mr->mr.mapsz = m;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
if (!ipath_alloc_lkey(lk_table, &mr->mr))
goto bail;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
goto done;
bail:
while (i) {
i--;
kfree(mr->mr.map[i]);
}
kfree(mr);
mr = NULL;
done:
return mr;
}
/**
* ipath_reg_phys_mr - register a physical memory region
* @pd: protection domain for this memory region
* @buffer_list: pointer to the list of physical buffers to register
* @num_phys_buf: the number of physical buffers to register
* @iova_start: the starting address passed over IB which maps to this MR
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start)
{
struct ipath_mr *mr;
int n, m, i;
struct ib_mr *ret;
mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
if (mr == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
m = 0;
n = 0;
for (i = 0; i < num_phys_buf; i++) {
mr->mr.map[m]->segs[n].vaddr =
phys_to_virt(buffer_list[i].addr);
mr->mr.map[m]->segs[n].length = buffer_list[i].size;
mr->mr.length += buffer_list[i].size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @region: the user memory region
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int mr_access_flags, struct ib_udata *udata)
{
struct ipath_mr *mr;
struct ib_umem_chunk *chunk;
int n, m, i;
struct ib_mr *ret;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
mr->mr.offset = region->offset;
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
m = 0;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) {
for (i = 0; i < chunk->nmap; i++) {
mr->mr.map[m]->segs[n].vaddr =
page_address(chunk->page_list[i].page);
mr->mr.map[m]->segs[n].length = region->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* ipath_dereg_mr - unregister and free a memory region
* @ibmr: the memory region to free
*
* Returns 0 on success.
*
* Note that this is called to free MRs created by ipath_get_dma_mr()
* or ipath_reg_user_mr().
*/
int ipath_dereg_mr(struct ib_mr *ibmr)
{
struct ipath_mr *mr = to_imr(ibmr);
int i;
ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
i = mr->mr.mapsz;
while (i) {
i--;
kfree(mr->mr.map[i]);
}
kfree(mr);
return 0;
}
/**
* ipath_alloc_fmr - allocate a fast memory region
* @pd: the protection domain for this memory region
* @mr_access_flags: access flags for this memory region
* @fmr_attr: fast memory region attributes
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct ipath_fmr *fmr;
int m, i = 0;
struct ib_fmr *ret;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
if (!fmr)
goto bail;
/* Allocate first level page tables. */
for (; i < m; i++) {
fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
GFP_KERNEL);
if (!fmr->mr.map[i])
goto bail;
}
fmr->mr.mapsz = m;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
goto bail;
fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->page_shift = fmr_attr->page_shift;
ret = &fmr->ibfmr;
goto done;
bail:
while (i)
kfree(fmr->mr.map[--i]);
kfree(fmr);
ret = ERR_PTR(-ENOMEM);
done:
return ret;
}
/**
* ipath_map_phys_fmr - set up a fast memory region
* @ibmfr: the fast memory region to set up
* @page_list: the list of pages to associate with the fast memory region
* @list_len: the number of pages to associate with the fast memory region
* @iova: the virtual address of the start of the fast memory region
*
* This may be called from interrupt context.
*/
int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
int list_len, u64 iova)
{
struct ipath_fmr *fmr = to_ifmr(ibfmr);
struct ipath_lkey_table *rkt;
unsigned long flags;
int m, n, i;
u32 ps;
int ret;
if (list_len > fmr->mr.max_segs) {
ret = -EINVAL;
goto bail;
}
rkt = &to_idev(ibfmr->device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
ps = 1 << fmr->page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;
ps = 1 << fmr->page_shift;
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]);
fmr->mr.map[m]->segs[n].length = ps;
if (++n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
spin_unlock_irqrestore(&rkt->lock, flags);
ret = 0;
bail:
return ret;
}
/**
* ipath_unmap_fmr - unmap fast memory regions
* @fmr_list: the list of fast memory regions to unmap
*
* Returns 0 on success.
*/
int ipath_unmap_fmr(struct list_head *fmr_list)
{
struct ipath_fmr *fmr;
struct ipath_lkey_table *rkt;
unsigned long flags;
list_for_each_entry(fmr, fmr_list, ibfmr.list) {
rkt = &to_idev(fmr->ibfmr.device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
spin_unlock_irqrestore(&rkt->lock, flags);
}
return 0;
}
/**
* ipath_dealloc_fmr - deallocate a fast memory region
* @ibfmr: the fast memory region to deallocate
*
* Returns 0 on success.
*/
int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct ipath_fmr *fmr = to_ifmr(ibfmr);
int i;
ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
i = fmr->mr.mapsz;
while (i)
kfree(fmr->mr.map[--i]);
kfree(fmr);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,913 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
#include "ips_common.h"
#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \
(off))
#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
BITS_PER_PAGE, off)
#define TRANS_INVALID 0
#define TRANS_ANY2RST 1
#define TRANS_RST2INIT 2
#define TRANS_INIT2INIT 3
#define TRANS_INIT2RTR 4
#define TRANS_RTR2RTS 5
#define TRANS_RTS2RTS 6
#define TRANS_SQERR2RTS 7
#define TRANS_ANY2ERR 8
#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
#define TRANS_SQD2RTS 11 /* error if not drained */
/*
* Convert the AETH credit code into the number of credits.
*/
static u32 credit_table[31] = {
0, /* 0 */
1, /* 1 */
2, /* 2 */
3, /* 3 */
4, /* 4 */
6, /* 5 */
8, /* 6 */
12, /* 7 */
16, /* 8 */
24, /* 9 */
32, /* A */
48, /* B */
64, /* C */
96, /* D */
128, /* E */
192, /* F */
256, /* 10 */
384, /* 11 */
512, /* 12 */
768, /* 13 */
1024, /* 14 */
1536, /* 15 */
2048, /* 16 */
3072, /* 17 */
4096, /* 18 */
6144, /* 19 */
8192, /* 1A */
12288, /* 1B */
16384, /* 1C */
24576, /* 1D */
32768 /* 1E */
};
static u32 alloc_qpn(struct ipath_qp_table *qpt)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
u32 ret;
qpn = qpt->last + 1;
if (qpn >= QPN_MAX)
qpn = 2;
offset = qpn & BITS_PER_PAGE_MASK;
map = &qpt->map[qpn / BITS_PER_PAGE];
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
unsigned long page = get_zeroed_page(GFP_KERNEL);
unsigned long flags;
/*
* Free the page if someone raced with us
* installing it:
*/
spin_lock_irqsave(&qpt->lock, flags);
if (map->page)
free_page(page);
else
map->page = (void *)page;
spin_unlock_irqrestore(&qpt->lock, flags);
if (unlikely(!map->page))
break;
}
if (likely(atomic_read(&map->n_free))) {
do {
if (!test_and_set_bit(offset, map->page)) {
atomic_dec(&map->n_free);
qpt->last = qpn;
ret = qpn;
goto bail;
}
offset = find_next_offset(map, offset);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
* If find_next_offset() does find a zero
* bit, we don't need to check for QPN
* wrapping around past our starting QPN.
* We just need to be sure we don't loop
* forever.
*/
} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
}
/*
* In order to keep the number of pages allocated to a
* minimum, we scan the all existing pages before increasing
* the size of the bitmap table.
*/
if (++i > max_scan) {
if (qpt->nmaps == QPNMAP_ENTRIES)
break;
map = &qpt->map[qpt->nmaps++];
offset = 0;
} else if (map < &qpt->map[qpt->nmaps]) {
++map;
offset = 0;
} else {
map = &qpt->map[0];
offset = 2;
}
qpn = mk_qpn(qpt, map, offset);
}
ret = 0;
bail:
return ret;
}
static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
{
struct qpn_map *map;
map = qpt->map + qpn / BITS_PER_PAGE;
if (map->page)
clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
atomic_inc(&map->n_free);
}
/**
* ipath_alloc_qpn - allocate a QP number
* @qpt: the QP table
* @qp: the QP
* @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
*
* Allocate the next available QPN and put the QP into the hash table.
* The hash table holds a reference to the QP.
*/
int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
enum ib_qp_type type)
{
unsigned long flags;
u32 qpn;
int ret;
if (type == IB_QPT_SMI)
qpn = 0;
else if (type == IB_QPT_GSI)
qpn = 1;
else {
/* Allocate the next available QPN */
qpn = alloc_qpn(qpt);
if (qpn == 0) {
ret = -ENOMEM;
goto bail;
}
}
qp->ibqp.qp_num = qpn;
/* Add the QP to the hash table. */
spin_lock_irqsave(&qpt->lock, flags);
qpn %= qpt->max;
qp->next = qpt->table[qpn];
qpt->table[qpn] = qp;
atomic_inc(&qp->refcount);
spin_unlock_irqrestore(&qpt->lock, flags);
ret = 0;
bail:
return ret;
}
/**
* ipath_free_qp - remove a QP from the QP table
* @qpt: the QP table
* @qp: the QP to remove
*
* Remove the QP from the table so it can't be found asynchronously by
* the receive interrupt routine.
*/
void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
/* Remove QP from the hash table. */
qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
for (; (q = *qpp) != NULL; qpp = &q->next) {
if (q == qp) {
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
if (!fnd)
return;
/* If QPN is not reserved, mark QPN free in the bitmap. */
if (qp->ibqp.qp_num > 1)
free_qpn(qpt, qp->ibqp.qp_num);
wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
* ipath_free_all_qps - remove all QPs from the table
* @qpt: the QP table to empty
*/
void ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
struct ipath_qp *qp, *nqp;
u32 n;
for (n = 0; n < qpt->max; n++) {
spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
spin_unlock_irqrestore(&qpt->lock, flags);
while (qp) {
nqp = qp->next;
if (qp->ibqp.qp_num > 1)
free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
_VERBS_INFO("QP memory leak!\n");
qp = nqp;
}
}
for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
if (qpt->map[n].page)
free_page((unsigned long)qpt->map[n].page);
}
}
/**
* ipath_lookup_qpn - return the QP with the given QPN
* @qpt: the QP table
* @qpn: the QP number to look up
*
* The caller is responsible for decrementing the QP reference count
* when done.
*/
struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
{
unsigned long flags;
struct ipath_qp *qp;
spin_lock_irqsave(&qpt->lock, flags);
for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
if (qp->ibqp.qp_num == qpn) {
atomic_inc(&qp->refcount);
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
return qp;
}
/**
* ipath_reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
*/
static void ipath_reset_qp(struct ipath_qp *qp)
{
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
atomic_set(&qp->msn, 0);
if (qp->ibqp.qp_type == IB_QPT_RC) {
qp->s_state = IB_OPCODE_RC_SEND_LAST;
qp->r_state = IB_OPCODE_RC_SEND_LAST;
} else {
qp->s_state = IB_OPCODE_UC_SEND_LAST;
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->s_nak_state = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
qp->s_cur = 0;
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->r_rq.head = 0;
qp->r_rq.tail = 0;
qp->r_reuse_sge = 0;
}
/**
* ipath_modify_qp - modify the attributes of a queue pair
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify
*
* Returns 0 on success, otherwise returns an errno.
*/
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask)
{
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
unsigned long flags;
int ret;
spin_lock_irqsave(&qp->r_rq.lock, flags);
spin_lock(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask))
goto inval;
switch (new_state) {
case IB_QPS_RESET:
ipath_reset_qp(qp);
break;
case IB_QPS_ERR:
ipath_error_qp(qp);
break;
default:
break;
}
if (attr_mask & IB_QP_PKEY_INDEX) {
struct ipath_ibdev *dev = to_idev(ibqp->device);
if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
goto inval;
qp->s_pkey_index = attr->pkey_index;
}
if (attr_mask & IB_QP_DEST_QPN)
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
if (attr_mask & IB_QP_RQ_PSN)
qp->r_psn = attr->rq_psn;
if (attr_mask & IB_QP_ACCESS_FLAGS)
qp->qp_access_flags = attr->qp_access_flags;
if (attr_mask & IB_QP_AV) {
if (attr->ah_attr.dlid == 0 ||
attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE)
goto inval;
qp->remote_ah_attr = attr->ah_attr;
}
if (attr_mask & IB_QP_PATH_MTU)
qp->path_mtu = attr->path_mtu;
if (attr_mask & IB_QP_RETRY_CNT)
qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
if (attr_mask & IB_QP_RNR_RETRY) {
qp->s_rnr_retry = attr->rnr_retry;
if (qp->s_rnr_retry > 7)
qp->s_rnr_retry = 7;
qp->s_rnr_retry_cnt = qp->s_rnr_retry;
}
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
if (attr->min_rnr_timer > 31)
goto inval;
qp->s_min_rnr_timer = attr->min_rnr_timer;
}
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
if (attr_mask & IB_QP_PKEY_INDEX)
qp->s_pkey_index = attr->pkey_index;
qp->state = new_state;
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/*
* If QP1 changed to the RTS state, try to move to the link to INIT
* even if it was ACTIVE so the SM will reinitialize the SMA's
* state.
*/
if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
struct ipath_ibdev *dev = to_idev(ibqp->device);
ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
}
ret = 0;
goto bail;
inval:
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
ret = -EINVAL;
bail:
return ret;
}
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr)
{
struct ipath_qp *qp = to_iqp(ibqp);
attr->qp_state = qp->state;
attr->cur_qp_state = attr->qp_state;
attr->path_mtu = qp->path_mtu;
attr->path_mig_state = 0;
attr->qkey = qp->qkey;
attr->rq_psn = qp->r_psn;
attr->sq_psn = qp->s_next_psn;
attr->dest_qp_num = qp->remote_qpn;
attr->qp_access_flags = qp->qp_access_flags;
attr->cap.max_send_wr = qp->s_size - 1;
attr->cap.max_recv_wr = qp->r_rq.size - 1;
attr->cap.max_send_sge = qp->s_max_sge;
attr->cap.max_recv_sge = qp->r_rq.max_sge;
attr->cap.max_inline_data = 0;
attr->ah_attr = qp->remote_ah_attr;
memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
attr->sq_draining = 0;
attr->max_rd_atomic = 1;
attr->max_dest_rd_atomic = 1;
attr->min_rnr_timer = qp->s_min_rnr_timer;
attr->port_num = 1;
attr->timeout = 0;
attr->retry_cnt = qp->s_retry_cnt;
attr->rnr_retry = qp->s_rnr_retry;
attr->alt_port_num = 0;
attr->alt_timeout = 0;
init_attr->event_handler = qp->ibqp.event_handler;
init_attr->qp_context = qp->ibqp.qp_context;
init_attr->send_cq = qp->ibqp.send_cq;
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
init_attr->sq_sig_type =
(qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
? IB_SIGNAL_REQ_WR : 0;
init_attr->qp_type = qp->ibqp.qp_type;
init_attr->port_num = 1;
return 0;
}
/**
* ipath_compute_aeth - compute the AETH (syndrome + MSN)
* @qp: the queue pair to compute the AETH for
*
* Returns the AETH.
*
* The QP s_lock should be held.
*/
__be32 ipath_compute_aeth(struct ipath_qp *qp)
{
u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK;
if (qp->s_nak_state) {
aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
} else if (qp->ibqp.srq) {
/*
* Shared receive queues don't generate credits.
* Set the credit field to the invalid value.
*/
aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT;
} else {
u32 min, max, x;
u32 credits;
/*
* Compute the number of credits available (RWQEs).
* XXX Not holding the r_rq.lock here so there is a small
* chance that the pair of reads are not atomic.
*/
credits = qp->r_rq.head - qp->r_rq.tail;
if ((int)credits < 0)
credits += qp->r_rq.size;
/*
* Binary search the credit table to find the code to
* use.
*/
min = 0;
max = 31;
for (;;) {
x = (min + max) / 2;
if (credit_table[x] == credits)
break;
if (credit_table[x] > credits)
max = x;
else if (min == x)
break;
else
min = x;
}
aeth |= x << IPS_AETH_CREDIT_SHIFT;
}
return cpu_to_be32(aeth);
}
/**
* ipath_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
* @udata: unused by InfiniPath
*
* Returns the queue pair on success, otherwise returns an errno.
*
* Called by the ib_create_qp() core verbs function.
*/
struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct ipath_qp *qp;
int err;
struct ipath_swqe *swq = NULL;
struct ipath_ibdev *dev;
size_t sz;
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > 255 ||
init_attr->cap.max_recv_sge > 255) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
switch (init_attr->qp_type) {
case IB_QPT_UC:
case IB_QPT_RC:
sz = sizeof(struct ipath_sge) *
init_attr->cap.max_send_sge +
sizeof(struct ipath_swqe);
swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
if (swq == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/* FALLTHROUGH */
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
qp = kmalloc(sizeof(*qp), GFP_KERNEL);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
sz = sizeof(struct ipath_sge) *
init_attr->cap.max_recv_sge +
sizeof(struct ipath_rwqe);
qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
if (!qp->r_rq.wq) {
kfree(qp);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/*
* ib_create_qp() will initialize qp->ibqp
* except for qp->ibqp.qp_num.
*/
spin_lock_init(&qp->s_lock);
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
tasklet_init(&qp->s_task,
init_attr->qp_type == IB_QPT_RC ?
ipath_do_rc_send : ipath_do_uc_send,
(unsigned long)qp);
qp->piowait.next = LIST_POISON1;
qp->piowait.prev = LIST_POISON2;
qp->timerwait.next = LIST_POISON1;
qp->timerwait.prev = LIST_POISON2;
qp->state = IB_QPS_RESET;
qp->s_wq = swq;
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
1 << IPATH_S_SIGNAL_REQ_WR : 0;
dev = to_idev(ibpd->device);
err = ipath_alloc_qpn(&dev->qp_table, qp,
init_attr->qp_type);
if (err) {
vfree(swq);
vfree(qp->r_rq.wq);
kfree(qp);
ret = ERR_PTR(err);
goto bail;
}
ipath_reset_qp(qp);
/* Tell the core driver that the kernel SMA is present. */
if (qp->ibqp.qp_type == IB_QPT_SMI)
ipath_layer_set_verbs_flags(dev->dd,
IPATH_VERBS_KERNEL_SMA);
break;
default:
/* Don't support raw QPs */
ret = ERR_PTR(-ENOSYS);
goto bail;
}
init_attr->cap.max_inline_data = 0;
ret = &qp->ibqp;
bail:
return ret;
}
/**
* ipath_destroy_qp - destroy a queue pair
* @ibqp: the queue pair to destroy
*
* Returns 0 on success.
*
* Note that this can be called while the QP is actively sending or
* receiving!
*/
int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
unsigned long flags;
/* Tell the core driver that the kernel SMA is gone. */
if (qp->ibqp.qp_type == IB_QPT_SMI)
ipath_layer_set_verbs_flags(dev->dd, 0);
spin_lock_irqsave(&qp->r_rq.lock, flags);
spin_lock(&qp->s_lock);
qp->state = IB_QPS_ERR;
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* Stop the sending tasklet. */
tasklet_kill(&qp->s_task);
/* Make sure the QP isn't on the timeout list. */
spin_lock_irqsave(&dev->pending_lock, flags);
if (qp->timerwait.next != LIST_POISON1)
list_del(&qp->timerwait);
if (qp->piowait.next != LIST_POISON1)
list_del(&qp->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
* Make sure that the QP is not in the QPN table so receive
* interrupts will discard packets for this QP. XXX Also remove QP
* from multicast table.
*/
if (atomic_read(&qp->refcount) != 0)
ipath_free_qp(&dev->qp_table, qp);
vfree(qp->s_wq);
vfree(qp->r_rq.wq);
kfree(qp);
return 0;
}
/**
* ipath_init_qp_table - initialize the QP table for a device
* @idev: the device who's QP table we're initializing
* @size: the size of the QP table
*
* Returns 0 on success, otherwise returns an errno.
*/
int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
{
int i;
int ret;
idev->qp_table.last = 1; /* QPN 0 and 1 are special. */
idev->qp_table.max = size;
idev->qp_table.nmaps = 1;
idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table),
GFP_KERNEL);
if (idev->qp_table.table == NULL) {
ret = -ENOMEM;
goto bail;
}
for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
idev->qp_table.map[i].page = NULL;
}
ret = 0;
bail:
return ret;
}
/**
* ipath_sqerror_qp - put a QP's send queue into an error state
* @qp: QP who's send queue will be put into an error state
* @wc: the WC responsible for putting the QP in this state
*
* Flushes the send work queue.
* The QP s_lock should be held.
*/
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
_VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
if (qp->timerwait.next != LIST_POISON1)
list_del(&qp->timerwait);
if (qp->piowait.next != LIST_POISON1)
list_del(&qp->piowait);
spin_unlock(&dev->pending_lock);
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
wc->status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
wc->wr_id = wqe->wr.wr_id;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
wqe = get_swqe_ptr(qp, qp->s_last);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->state = IB_QPS_SQE;
}
/**
* ipath_error_qp - put a QP into an error state
* @qp: the QP to put into an error state
*
* Flushes both send and receive work queues.
* QP r_rq.lock and s_lock should be held.
*/
void ipath_error_qp(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
_VERBS_INFO("QP%d/%d in error state\n",
qp->ibqp.qp_num, qp->remote_qpn);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
if (qp->timerwait.next != LIST_POISON1)
list_del(&qp->timerwait);
if (qp->piowait.next != LIST_POISON1)
list_del(&qp->piowait);
spin_unlock(&dev->pending_lock);
wc.status = IB_WC_WR_FLUSH_ERR;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
wc.qp_num = qp->ibqp.qp_num;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
wc.wr_id = wqe->wr.wr_id;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->s_hdrwords = 0;
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
wc.opcode = IB_WC_RECV;
while (qp->r_rq.tail != qp->r_rq.head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
if (++qp->r_rq.tail >= qp->r_rq.size)
qp->r_rq.tail = 0;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
}
/**
* ipath_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
*/
void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
{
u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK;
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
* honor the credit field.
*/
if (credit == IPS_AETH_CREDIT_INVAL) {
qp->s_lsn = (u32) -1;
} else if (qp->s_lsn != (u32) -1) {
/* Compute new LSN (i.e., MSN + credit) */
credit = (aeth + credit_table[credit]) & IPS_MSN_MASK;
if (ipath_cmp24(credit, qp->s_lsn) > 0)
qp->s_lsn = credit;
}
/* Restart sending if it was blocked due to lack of credits. */
if (qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
tasklet_hi_schedule(&qp->s_task);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,446 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_REGISTERS_H
#define _IPATH_REGISTERS_H
/*
* This file should only be included by kernel source, and by the diags.
* It defines the registers, and their contents, for the InfiniPath HT-400 chip
*/
/*
* These are the InfiniPath register and buffer bit definitions,
* that are visible to software, and needed only by the kernel
* and diag code. A few, that are visible to protocol and user
* code are in ipath_common.h. Some bits are specific
* to a given chip implementation, and have been moved to the
* chip-specific source file
*/
/* kr_revision bits */
#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
#define INFINIPATH_R_ARCH_MASK 0xFF
#define INFINIPATH_R_ARCH_SHIFT 16
#define INFINIPATH_R_SOFTWARE_MASK 0xFF
#define INFINIPATH_R_SOFTWARE_SHIFT 24
#define INFINIPATH_R_BOARDID_MASK 0xFF
#define INFINIPATH_R_BOARDID_SHIFT 32
/* kr_control bits */
#define INFINIPATH_C_FREEZEMODE 0x00000002
#define INFINIPATH_C_LINKENABLE 0x00000004
#define INFINIPATH_C_RESET 0x00000001
/* kr_sendctrl bits */
#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
#define IPATH_S_ABORT 0
#define IPATH_S_PIOINTBUFAVAIL 1
#define IPATH_S_PIOBUFAVAILUPD 2
#define IPATH_S_PIOENABLE 3
#define IPATH_S_DISARM 31
#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
/* kr_rcvctrl bits */
#define INFINIPATH_R_PORTENABLE_SHIFT 0
#define INFINIPATH_R_INTRAVAIL_SHIFT 16
#define INFINIPATH_R_TAILUPD 0x80000000
/* kr_intstatus, kr_intclear, kr_intmask bits */
#define INFINIPATH_I_RCVURG_SHIFT 0
#define INFINIPATH_I_RCVAVAIL_SHIFT 12
#define INFINIPATH_I_ERROR 0x80000000
#define INFINIPATH_I_SPIOSENT 0x40000000
#define INFINIPATH_I_SPIOBUFAVAIL 0x20000000
#define INFINIPATH_I_GPIO 0x10000000
/* kr_errorstatus, kr_errorclear, kr_errormask bits */
#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
#define INFINIPATH_E_RICRC 0x0000000000000004ULL
#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
#define INFINIPATH_E_REBP 0x0000000000000200ULL
#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
#define INFINIPATH_E_RHDR 0x0000000000010000ULL
#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
#define INFINIPATH_E_RESET 0x0004000000000000ULL
#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
* RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
* bit 4: flag buffer, 5: datainfo, 6: header info */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL
#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL
#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL
#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
/* kr_ibcctrl bits */
#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
#define INFINIPATH_IBCC_LINKINITCMD_POLL 2 /* cycle through TS1/TS2 till OK */
#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 /* wait for TS1, then go on */
#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
#define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */
#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL
#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
/* kr_ibcstatus bits */
#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
#define INFINIPATH_IBCS_TXREADY 0x40000000
#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
/* link training states (shift by INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00
#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01
#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02
#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03
#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04
#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05
#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08
#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09
#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a
#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b
#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
#define INFINIPATH_IBCS_L_STATE_INIT 0x1
#define INFINIPATH_IBCS_L_STATE_ARM 0x2
#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
/* kr_extstatus bits */
#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
/* kr_extctrl bits */
#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL
#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL
#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL
#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL
#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL
#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL
#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL
#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL
#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL
#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL
#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL
#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
/* kr_mdio bits */
#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL
#define INFINIPATH_MDIO_CLKDIV_SHIFT 32
#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL
#define INFINIPATH_MDIO_COMMAND_SHIFT 26
#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL
#define INFINIPATH_MDIO_DEVADDR_SHIFT 21
#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL
#define INFINIPATH_MDIO_REGADDR_SHIFT 16
#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL
#define INFINIPATH_MDIO_DATA_SHIFT 0
#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL
#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL
/* kr_partitionkey bits */
#define INFINIPATH_PKEY_SIZE 16
#define INFINIPATH_PKEY_MASK 0xFFFF
#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
/* kr_serdesconfig0 bits */
#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */
#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */
#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL /* tx idle enables (per lane) */
#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL /* rx detect enables (per lane) */
#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL /* L1 Power down; use with RXDETECT,
Otherwise not used on IB side */
/* kr_xgxsconfig bits */
#define INFINIPATH_XGXS_RESET 0x7ULL
#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
/* TID entries (memory), HT400-only */
#define INFINIPATH_RT_VALID 0x8000000000000000ULL
#define INFINIPATH_RT_ADDR_SHIFT 0
#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
#define INFINIPATH_RT_BUFSIZE_SHIFT 48
/*
* IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
* PIO send buffers. This is well beyond anything currently
* defined in the InfiniBand spec.
*/
#define IPATH_PIO_MAXIBHDR 128
typedef u64 ipath_err_t;
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
/* masks that are different in various chips, or only exist in some chips */
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
* register bits for selecting i2c direction and values, used for I2C serial
* flash
*/
extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
extern u64 ipath_gpio_sda, ipath_gpio_scl;
/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
* an integer type doesn't allow type-checking as thorough as, say,
* an enum but allows for better hiding of chip differences.
*/
typedef const u16 ipath_kreg, /* infinipath general registers */
ipath_creg, /* infinipath counter registers */
ipath_sreg; /* kernel-only, infinipath send registers */
/*
* These are the chip registers common to all infinipath chips, and
* used both by the kernel and the diagnostics or other user code.
* They are all implemented such that 64 bit accesses work.
* Some implement no more than 32 bits. Because 64 bit reads
* require 2 HT cmds on opteron, we access those with 32 bit
* reads for efficiency (they are written as 64 bits, since
* the extra 32 bits are nearly free on writes, and it slightly reduces
* complexity). The rest are all accessed as 64 bits.
*/
struct ipath_kregs {
/* These are the 32 bit group */
ipath_kreg kr_control;
ipath_kreg kr_counterregbase;
ipath_kreg kr_intmask;
ipath_kreg kr_intstatus;
ipath_kreg kr_pagealign;
ipath_kreg kr_portcnt;
ipath_kreg kr_rcvtidbase;
ipath_kreg kr_rcvtidcnt;
ipath_kreg kr_rcvegrbase;
ipath_kreg kr_rcvegrcnt;
ipath_kreg kr_scratch;
ipath_kreg kr_sendctrl;
ipath_kreg kr_sendpiobufbase;
ipath_kreg kr_sendpiobufcnt;
ipath_kreg kr_sendpiosize;
ipath_kreg kr_sendregbase;
ipath_kreg kr_userregbase;
/* These are the 64 bit group */
ipath_kreg kr_debugport;
ipath_kreg kr_debugportselect;
ipath_kreg kr_errorclear;
ipath_kreg kr_errormask;
ipath_kreg kr_errorstatus;
ipath_kreg kr_extctrl;
ipath_kreg kr_extstatus;
ipath_kreg kr_gpio_clear;
ipath_kreg kr_gpio_mask;
ipath_kreg kr_gpio_out;
ipath_kreg kr_gpio_status;
ipath_kreg kr_hwdiagctrl;
ipath_kreg kr_hwerrclear;
ipath_kreg kr_hwerrmask;
ipath_kreg kr_hwerrstatus;
ipath_kreg kr_ibcctrl;
ipath_kreg kr_ibcstatus;
ipath_kreg kr_intblocked;
ipath_kreg kr_intclear;
ipath_kreg kr_interruptconfig;
ipath_kreg kr_mdio;
ipath_kreg kr_partitionkey;
ipath_kreg kr_rcvbthqp;
ipath_kreg kr_rcvbufbase;
ipath_kreg kr_rcvbufsize;
ipath_kreg kr_rcvctrl;
ipath_kreg kr_rcvhdrcnt;
ipath_kreg kr_rcvhdrentsize;
ipath_kreg kr_rcvhdrsize;
ipath_kreg kr_rcvintmembase;
ipath_kreg kr_rcvintmemsize;
ipath_kreg kr_revision;
ipath_kreg kr_sendbuffererror;
ipath_kreg kr_sendpioavailaddr;
ipath_kreg kr_serdesconfig0;
ipath_kreg kr_serdesconfig1;
ipath_kreg kr_serdesstatus;
ipath_kreg kr_txintmembase;
ipath_kreg kr_txintmemsize;
ipath_kreg kr_xgxsconfig;
ipath_kreg kr_ibpllcfg;
/* use these two (and the following N ports) only with ipath_k*_kreg64_port();
* not *kreg64() */
ipath_kreg kr_rcvhdraddr;
ipath_kreg kr_rcvhdrtailaddr;
/* remaining registers are not present on all types of infinipath chips */
ipath_kreg kr_rcvpktledcnt;
ipath_kreg kr_pcierbuftestreg0;
ipath_kreg kr_pcierbuftestreg1;
ipath_kreg kr_pcieq0serdesconfig0;
ipath_kreg kr_pcieq0serdesconfig1;
ipath_kreg kr_pcieq0serdesstatus;
ipath_kreg kr_pcieq1serdesconfig0;
ipath_kreg kr_pcieq1serdesconfig1;
ipath_kreg kr_pcieq1serdesstatus;
};
struct ipath_cregs {
ipath_creg cr_badformatcnt;
ipath_creg cr_erricrccnt;
ipath_creg cr_errlinkcnt;
ipath_creg cr_errlpcrccnt;
ipath_creg cr_errpkey;
ipath_creg cr_errrcvflowctrlcnt;
ipath_creg cr_err_rlencnt;
ipath_creg cr_errslencnt;
ipath_creg cr_errtidfull;
ipath_creg cr_errtidvalid;
ipath_creg cr_errvcrccnt;
ipath_creg cr_ibstatuschange;
ipath_creg cr_intcnt;
ipath_creg cr_invalidrlencnt;
ipath_creg cr_invalidslencnt;
ipath_creg cr_lbflowstallcnt;
ipath_creg cr_iblinkdowncnt;
ipath_creg cr_iblinkerrrecovcnt;
ipath_creg cr_ibsymbolerrcnt;
ipath_creg cr_pktrcvcnt;
ipath_creg cr_pktrcvflowctrlcnt;
ipath_creg cr_pktsendcnt;
ipath_creg cr_pktsendflowcnt;
ipath_creg cr_portovflcnt;
ipath_creg cr_rcvebpcnt;
ipath_creg cr_rcvovflcnt;
ipath_creg cr_rxdroppktcnt;
ipath_creg cr_senddropped;
ipath_creg cr_sendstallcnt;
ipath_creg cr_sendunderruncnt;
ipath_creg cr_unsupvlcnt;
ipath_creg cr_wordrcvcnt;
ipath_creg cr_wordsendcnt;
};
#endif /* _IPATH_REGISTERS_H */

View File

@ -0,0 +1,552 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ipath_verbs.h"
/*
* Convert the AETH RNR timeout code into the number of milliseconds.
*/
const u32 ib_ipath_rnr_table[32] = {
656, /* 0 */
1, /* 1 */
1, /* 2 */
1, /* 3 */
1, /* 4 */
1, /* 5 */
1, /* 6 */
1, /* 7 */
1, /* 8 */
1, /* 9 */
1, /* A */
1, /* B */
1, /* C */
1, /* D */
2, /* E */
2, /* F */
3, /* 10 */
4, /* 11 */
6, /* 12 */
8, /* 13 */
11, /* 14 */
16, /* 15 */
21, /* 16 */
31, /* 17 */
41, /* 18 */
62, /* 19 */
82, /* 1A */
123, /* 1B */
164, /* 1C */
246, /* 1D */
328, /* 1E */
492 /* 1F */
};
/**
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
*/
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
spin_lock_irqsave(&dev->pending_lock, flags);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
struct list_head *l = &dev->rnrwait;
struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
timerwait);
while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
qp->s_rnr_timeout -= nqp->s_rnr_timeout;
l = l->next;
if (l->next == &dev->rnrwait)
break;
nqp = list_entry(l->next, struct ipath_qp,
timerwait);
}
list_add(&qp->timerwait, l);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
}
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update wr_id only, not SGEs
*
* Return 0 if no RWQE is available, otherwise return 1.
*
* Called at interrupt level with the QP r_rq.lock held.
*/
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
struct ipath_rq *rq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
int ret;
if (!qp->ibqp.srq) {
rq = &qp->r_rq;
if (unlikely(rq->tail == rq->head)) {
ret = 0;
goto bail;
}
wqe = get_rwqe_ptr(rq, rq->tail);
qp->r_wr_id = wqe->wr_id;
if (!wr_id_only) {
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->num_sge;
qp->r_len = wqe->length;
}
if (++rq->tail >= rq->size)
rq->tail = 0;
ret = 1;
goto bail;
}
srq = to_isrq(qp->ibqp.srq);
rq = &srq->rq;
spin_lock(&rq->lock);
if (unlikely(rq->tail == rq->head)) {
spin_unlock(&rq->lock);
ret = 0;
goto bail;
}
wqe = get_rwqe_ptr(rq, rq->tail);
qp->r_wr_id = wqe->wr_id;
if (!wr_id_only) {
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->num_sge;
qp->r_len = wqe->length;
}
if (++rq->tail >= rq->size)
rq->tail = 0;
if (srq->ibsrq.event_handler) {
struct ib_event ev;
u32 n;
if (rq->head < rq->tail)
n = rq->size + rq->head - rq->tail;
else
n = rq->head - rq->tail;
if (n < srq->limit) {
srq->limit = 0;
spin_unlock(&rq->lock);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
srq->ibsrq.event_handler(&ev,
srq->ibsrq.srq_context);
} else
spin_unlock(&rq->lock);
} else
spin_unlock(&rq->lock);
ret = 1;
bail:
return ret;
}
/**
* ipath_ruc_loopback - handle UC and RC lookback requests
* @sqp: the loopback QP
* @wc: the work completion entry
*
* This is called from ipath_do_uc_send() or ipath_do_rc_send() to
* forward a WQE addressed to the same HCA.
* Note that although we are single threaded due to the tasklet, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
struct ipath_swqe *wqe;
struct ipath_sge *sge;
unsigned long flags;
u64 sdata;
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
if (!qp) {
dev->n_pkt_drops++;
return;
}
again:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
/* Get the next send request. */
if (sqp->s_last == sqp->s_head) {
/* Send work queue is empty. */
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
*/
wqe = get_swqe_ptr(sqp, sqp->s_last);
spin_unlock_irqrestore(&sqp->s_lock, flags);
wc->wc_flags = 0;
wc->imm_data = 0;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc->wc_flags = IB_WC_WITH_IMM;
wc->imm_data = wqe->wr.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
spin_lock_irqsave(&qp->r_rq.lock, flags);
if (!ipath_get_rwqe(qp, 0)) {
rnr_nak:
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
if (sqp->s_rnr_retry == 0) {
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
goto err;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
ib_ipath_rnr_table[sqp->s_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
wc->wc_flags = IB_WC_WITH_IMM;
wc->imm_data = wqe->wr.imm_data;
spin_lock_irqsave(&qp->r_rq.lock, flags);
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) {
acc_err:
wc->status = IB_WC_REM_ACCESS_ERR;
err:
wc->wr_id = wqe->wr.wr_id;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = 0;
wc->qp_num = sqp->ibqp.qp_num;
wc->src_qp = sqp->remote_qpn;
wc->pkey_index = 0;
wc->slid = sqp->remote_ah_attr.dlid;
wc->sl = sqp->remote_ah_attr.sl;
wc->dlid_path_bits = 0;
wc->port_num = 0;
ipath_sqerror_qp(sqp, wc);
goto done;
}
break;
case IB_WR_RDMA_READ:
if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
if (unlikely(!(qp->qp_access_flags &
IB_ACCESS_REMOTE_READ)))
goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
sdata = wqe->wr.wr.atomic.swap;
spin_lock_irqsave(&dev->pending_lock, flags);
qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
*(u64 *) qp->r_sge.sge.vaddr =
qp->r_atomic_data + sdata;
else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
*(u64 *) qp->r_sge.sge.vaddr = sdata;
spin_unlock_irqrestore(&dev->pending_lock, flags);
*(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
goto send_comp;
default:
goto done;
}
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
u32 len = sqp->s_len;
if (len > sge->length)
len = sge->length;
BUG_ON(len == 0);
ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
sqp->s_len -= len;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
wqe->wr.opcode == IB_WR_RDMA_READ)
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc->opcode = IB_WC_RECV;
wc->wr_id = qp->r_wr_id;
wc->status = IB_WC_SUCCESS;
wc->vendor_err = 0;
wc->byte_len = wqe->length;
wc->qp_num = qp->ibqp.qp_num;
wc->src_qp = qp->remote_qpn;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
wc->sl = qp->remote_ah_attr.sl;
wc->dlid_path_bits = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc->wr_id = wqe->wr.wr_id;
wc->status = IB_WC_SUCCESS;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = wqe->length;
wc->qp_num = sqp->ibqp.qp_num;
wc->src_qp = 0;
wc->pkey_index = 0;
wc->slid = 0;
wc->sl = 0;
wc->dlid_path_bits = 0;
wc->port_num = 0;
ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0);
}
/* Update s_last now that we are finished with the SWQE */
spin_lock_irqsave(&sqp->s_lock, flags);
if (++sqp->s_last >= sqp->s_size)
sqp->s_last = 0;
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto again;
done:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
/**
* ipath_no_bufs_available - tell the layer driver we need buffers
* @qp: the QP that caused the problem
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
*/
void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
{
unsigned long flags;
spin_lock_irqsave(&dev->pending_lock, flags);
if (qp->piowait.next == LIST_POISON1)
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
* Note that as soon as ipath_layer_want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
* could be called. If we are still in the tasklet function,
* tasklet_hi_schedule() will not call us until the next time
* tasklet_hi_schedule() is called.
* We clear the tasklet flag now since we are committing to return
* from the tasklet function.
*/
clear_bit(IPATH_S_BUSY, &qp->s_flags);
tasklet_unlock(&qp->s_task);
ipath_layer_want_buffer(dev->dd);
dev->n_piowait++;
}
/**
* ipath_post_rc_send - post RC and UC sends
* @qp: the QP to post on
* @wr: the work request to send
*/
int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
{
struct ipath_swqe *wqe;
unsigned long flags;
u32 next;
int i, j;
int acc;
int ret;
/*
* Don't allow RDMA reads or atomic operations on UC or
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
ret = -EINVAL;
goto bail;
}
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
ret = -EINVAL;
goto bail;
} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
(wr->num_sge == 0 ||
wr->sg_list[0].length < sizeof(u64) ||
wr->sg_list[0].addr & (sizeof(u64) - 1))) {
ret = -EINVAL;
goto bail;
}
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&qp->s_lock, flags);
next = qp->s_head + 1;
if (next >= qp->s_size)
next = 0;
if (next == qp->s_last) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_head);
wqe->wr = *wr;
wqe->ssn = qp->s_ssn++;
wqe->sg_list[0].mr = NULL;
wqe->sg_list[0].vaddr = NULL;
wqe->sg_list[0].length = 0;
wqe->sg_list[0].sge_length = 0;
wqe->length = 0;
acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0, j = 0; i < wr->num_sge; i++) {
if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
&wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
wqe->length += wr->sg_list[i].length;
j++;
}
wqe->wr.num_sge = j;
qp->s_head = next;
/*
* Wake up the send tasklet if the QP is not waiting
* for an RNR timeout.
*/
next = qp->s_rnr_timeout;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (next == 0) {
if (qp->ibqp.qp_type == IB_QPT_UC)
ipath_do_uc_send((unsigned long) qp);
else
ipath_do_rc_send((unsigned long) qp);
}
ret = 0;
bail:
return ret;
}

View File

@ -0,0 +1,273 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
/**
* ipath_post_srq_receive - post a receive on a shared receive queue
* @ibsrq: the SRQ to post the receive on
* @wr: the list of work requests to post
* @bad_wr: the first WR to cause a problem is put here
*
* This may be called from interrupt context.
*/
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
int i, j;
if (wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
next = srq->rq.head + 1;
if (next >= srq->rq.size)
next = 0;
if (next == srq->rq.tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
wqe->wr_id = wr->wr_id;
wqe->sg_list[0].mr = NULL;
wqe->sg_list[0].vaddr = NULL;
wqe->sg_list[0].length = 0;
wqe->sg_list[0].sge_length = 0;
wqe->length = 0;
for (i = 0, j = 0; i < wr->num_sge; i++) {
/* Check LKEY */
if (to_ipd(srq->ibsrq.pd)->user &&
wr->sg_list[i].lkey == 0) {
spin_unlock_irqrestore(&srq->rq.lock,
flags);
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&dev->lk_table,
&wqe->sg_list[j],
&wr->sg_list[i],
IB_ACCESS_LOCAL_WRITE)) {
spin_unlock_irqrestore(&srq->rq.lock,
flags);
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
wqe->length += wr->sg_list[i].length;
j++;
}
wqe->num_sge = j;
srq->rq.head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
bail:
return ret;
}
/**
* ipath_create_srq - create a shared receive queue
* @ibpd: the protection domain of the SRQ to create
* @attr: the attributes of the SRQ
* @udata: not used by the InfiniPath verbs driver
*/
struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
struct ipath_srq *srq;
u32 sz;
struct ib_srq *ret;
if (srq_init_attr->attr.max_sge < 1) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
sizeof(struct ipath_rwqe);
srq->rq.wq = vmalloc(srq->rq.size * sz);
if (!srq->rq.wq) {
kfree(srq);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
srq->rq.head = 0;
srq->rq.tail = 0;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
srq->limit = srq_init_attr->attr.srq_limit;
ret = &srq->ibsrq;
bail:
return ret;
}
/**
* ipath_modify_srq - modify a shared receive queue
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
*/
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask)
{
struct ipath_srq *srq = to_isrq(ibsrq);
unsigned long flags;
int ret;
if (attr_mask & IB_SRQ_LIMIT) {
spin_lock_irqsave(&srq->rq.lock, flags);
srq->limit = attr->srq_limit;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
if (attr_mask & IB_SRQ_MAX_WR) {
u32 size = attr->max_wr + 1;
struct ipath_rwqe *wq, *p;
u32 n;
u32 sz;
if (attr->max_sge < srq->rq.max_sge) {
ret = -EINVAL;
goto bail;
}
sz = sizeof(struct ipath_rwqe) +
attr->max_sge * sizeof(struct ipath_sge);
wq = vmalloc(size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
if (srq->rq.head < srq->rq.tail)
n = srq->rq.size + srq->rq.head - srq->rq.tail;
else
n = srq->rq.head - srq->rq.tail;
if (size <= n || size <= srq->limit) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
vfree(wq);
ret = -EINVAL;
goto bail;
}
n = 0;
p = wq;
while (srq->rq.tail != srq->rq.head) {
struct ipath_rwqe *wqe;
int i;
wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
p->wr_id = wqe->wr_id;
p->length = wqe->length;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct ipath_rwqe *)((char *) p + sz);
if (++srq->rq.tail >= srq->rq.size)
srq->rq.tail = 0;
}
vfree(srq->rq.wq);
srq->rq.wq = wq;
srq->rq.size = size;
srq->rq.head = n;
srq->rq.tail = 0;
srq->rq.max_sge = attr->max_sge;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
bail:
return ret;
}
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
attr->max_wr = srq->rq.size - 1;
attr->max_sge = srq->rq.max_sge;
attr->srq_limit = srq->limit;
return 0;
}
/**
* ipath_destroy_srq - destroy a shared receive queue
* @ibsrq: the SRQ to destroy
*/
int ipath_destroy_srq(struct ib_srq *ibsrq)
{
struct ipath_srq *srq = to_isrq(ibsrq);
vfree(srq->rq.wq);
kfree(srq);
return 0;
}

View File

@ -0,0 +1,303 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/pci.h>
#include "ipath_kernel.h"
struct infinipath_stats ipath_stats;
/**
* ipath_snap_cntr - snapshot a chip counter
* @dd: the infinipath device
* @creg: the counter to snapshot
*
* called from add_timer and user counter read calls, to deal with
* counters that wrap in "human time". The words sent and received, and
* the packets sent and received are all that we worry about. For now,
* at least, we don't worry about error counters, because if they wrap
* that quickly, we probably don't care. We may eventually just make this
* handle all the counters. word counters can wrap in about 20 seconds
* of full bandwidth traffic, packet counters in a few hours.
*/
u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
{
u32 val, reg64 = 0;
u64 val64;
unsigned long t0, t1;
u64 ret;
t0 = jiffies;
/* If fast increment counters are only 32 bits, snapshot them,
* and maintain them as 64bit values in the driver */
if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
(creg == dd->ipath_cregs->cr_wordsendcnt ||
creg == dd->ipath_cregs->cr_wordrcvcnt ||
creg == dd->ipath_cregs->cr_pktsendcnt ||
creg == dd->ipath_cregs->cr_pktrcvcnt)) {
val64 = ipath_read_creg(dd, creg);
val = val64 == ~0ULL ? ~0U : 0;
reg64 = 1;
} else /* val64 just to keep gcc quiet... */
val64 = val = ipath_read_creg32(dd, creg);
/*
* See if a second has passed. This is just a way to detect things
* that are quite broken. Normally this should take just a few
* cycles (the check is for long enough that we don't care if we get
* pre-empted.) An Opteron HT O read timeout is 4 seconds with
* normal NB values
*/
t1 = jiffies;
if (time_before(t0 + HZ, t1) && val == -1) {
ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
creg);
ret = 0ULL;
goto bail;
}
if (reg64) {
ret = val64;
goto bail;
}
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
if (val != dd->ipath_lastsword) {
dd->ipath_sword += val - dd->ipath_lastsword;
dd->ipath_lastsword = val;
}
val64 = dd->ipath_sword;
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
if (val != dd->ipath_lastrword) {
dd->ipath_rword += val - dd->ipath_lastrword;
dd->ipath_lastrword = val;
}
val64 = dd->ipath_rword;
} else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
if (val != dd->ipath_lastspkts) {
dd->ipath_spkts += val - dd->ipath_lastspkts;
dd->ipath_lastspkts = val;
}
val64 = dd->ipath_spkts;
} else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
if (val != dd->ipath_lastrpkts) {
dd->ipath_rpkts += val - dd->ipath_lastrpkts;
dd->ipath_lastrpkts = val;
}
val64 = dd->ipath_rpkts;
} else
val64 = (u64) val;
ret = val64;
bail:
return ret;
}
/**
* ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
* @dd: the infinipath device
*
* print the delta of egrfull/hdrqfull errors for kernel ports no more than
* every 5 seconds. User processes are printed at close, but kernel doesn't
* close, so... Separate routine so may call from other places someday, and
* so function name when printed by _IPATH_INFO is meaningfull
*/
static void ipath_qcheck(struct ipath_devdata *dd)
{
static u64 last_tot_hdrqfull;
size_t blen = 0;
char buf[128];
*buf = 0;
if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) {
blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
dd->ipath_pd[0]->port_hdrqfull -
dd->ipath_p0_hdrqfull);
dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull;
}
if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
blen += snprintf(buf + blen, sizeof buf - blen,
"%srcvegrfull %llu",
blen ? ", " : "",
(unsigned long long)
(ipath_stats.sps_etidfull -
dd->ipath_last_tidfull));
dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
}
/*
* this is actually the number of hdrq full interrupts, not actual
* events, but at the moment that's mostly what I'm interested in.
* Actual count, etc. is in the counters, if needed. For production
* users this won't ordinarily be printed.
*/
if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
blen += snprintf(buf + blen, sizeof buf - blen,
"%shdrqfull %llu (all ports)",
blen ? ", " : "",
(unsigned long long)
(ipath_stats.sps_hdrqfull -
last_tot_hdrqfull));
last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
}
if (blen)
ipath_dbg("%s\n", buf);
if (dd->ipath_port0head != (u32)
le64_to_cpu(*dd->ipath_hdrqtailptr)) {
if (dd->ipath_lastport0rcv_cnt ==
ipath_stats.sps_port0pkts) {
ipath_cdbg(PKT, "missing rcv interrupts? "
"port0 hd=%llx tl=%x; port0pkts %llx\n",
(unsigned long long)
le64_to_cpu(*dd->ipath_hdrqtailptr),
dd->ipath_port0head,
(unsigned long long)
ipath_stats.sps_port0pkts);
ipath_kreceive(dd);
}
dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
}
}
/**
* ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata
*
* called from add_timer
*/
void ipath_get_faststats(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
u32 val;
static unsigned cnt;
/*
* don't access the chip while running diags, or memory diags can
* fail
*/
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
ipath_diag_inuse)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
}
ipath_qcheck(dd);
/*
* deal with repeat error suppression. Doesn't really matter if
* last error was almost a full interval ago, or just a few usecs
* ago; still won't get more than 2 per interval. We may want
* longer intervals for this eventually, could do with mod, counter
* or separate timer. Also see code in ipath_handle_errors() and
* ipath_handle_hwerrors().
*/
if (dd->ipath_lasterror)
dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0;
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
ipath_decode_err(ebuf, sizeof ebuf,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
ipath_dev_err(dd, "Re-enabling masked errors "
"(%s)\n", ebuf);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal", for some
* types of processes (mostly benchmarks) that send
* huge numbers of messages, while not processing
* them. So only complain about these at debug
* level.
*/
ipath_dbg("Disabling frequent queue full errors "
"(%s)\n", ebuf);
}
dd->ipath_maskederrs = dd->ipath_ignorederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
}
/* limit qfull messages to ~one per minute per port */
if ((++cnt & 0x10)) {
for (val = dd->ipath_cfgports - 1; ((int)val) >= 0;
val--) {
if (dd->ipath_lastegrheads[val] != -1)
dd->ipath_lastegrheads[val] = -1;
if (dd->ipath_lastrcvhdrqtails[val] != -1)
dd->ipath_lastrcvhdrqtails[val] = -1;
}
}
if (dd->ipath_nosma_bufs) {
dd->ipath_nosma_secs += 5;
if (dd->ipath_nosma_secs >= 30) {
ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
"cancelling pending sends\n",
dd->ipath_nosma_secs);
/*
* issue an abort as well, in case we have a packet
* stuck in launch fifo. This could corrupt an
* outgoing user packet in the worst case,
* but this is a pretty catastrophic, anyway.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_ABORT);
ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
dd->ipath_piobcnt2k +
dd->ipath_piobcnt4k -
dd->ipath_lastport_piobuf);
/* start again, if necessary */
dd->ipath_nosma_secs = 0;
} else
ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
"after %u seconds\n",
dd->ipath_nosma_bufs,
dd->ipath_nosma_secs);
}
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}

View File

@ -0,0 +1,778 @@
/*
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/ctype.h>
#include <linux/pci.h>
#include "ipath_kernel.h"
#include "ips_common.h"
#include "ipath_layer.h"
/**
* ipath_parse_ushort - parse an unsigned short value in an arbitrary base
* @str: the string containing the number
* @valp: where to put the result
*
* returns the number of bytes consumed, or negative value on error
*/
int ipath_parse_ushort(const char *str, unsigned short *valp)
{
unsigned long val;
char *end;
int ret;
if (!isdigit(str[0])) {
ret = -EINVAL;
goto bail;
}
val = simple_strtoul(str, &end, 0);
if (val > 0xffff) {
ret = -EINVAL;
goto bail;
}
*valp = val;
ret = end + 1 - str;
if (ret == 0)
ret = -EINVAL;
bail:
return ret;
}
static ssize_t show_version(struct device_driver *dev, char *buf)
{
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
}
static ssize_t show_num_units(struct device_driver *dev, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "%d\n",
ipath_count_units(NULL, NULL, NULL));
}
#define DRIVER_STAT(name, attr) \
static ssize_t show_stat_##name(struct device_driver *dev, \
char *buf) \
{ \
return scnprintf( \
buf, PAGE_SIZE, "%llu\n", \
(unsigned long long) ipath_stats.sps_ ##attr); \
} \
static DRIVER_ATTR(name, S_IRUGO, show_stat_##name, NULL)
DRIVER_STAT(intrs, ints);
DRIVER_STAT(err_intrs, errints);
DRIVER_STAT(errs, errs);
DRIVER_STAT(pkt_errs, pkterrs);
DRIVER_STAT(crc_errs, crcerrs);
DRIVER_STAT(hw_errs, hwerrs);
DRIVER_STAT(ib_link, iblink);
DRIVER_STAT(port0_pkts, port0pkts);
DRIVER_STAT(ether_spkts, ether_spkts);
DRIVER_STAT(ether_rpkts, ether_rpkts);
DRIVER_STAT(sma_spkts, sma_spkts);
DRIVER_STAT(sma_rpkts, sma_rpkts);
DRIVER_STAT(hdrq_full, hdrqfull);
DRIVER_STAT(etid_full, etidfull);
DRIVER_STAT(no_piobufs, nopiobufs);
DRIVER_STAT(ports, ports);
DRIVER_STAT(pkey0, pkeys[0]);
DRIVER_STAT(pkey1, pkeys[1]);
DRIVER_STAT(pkey2, pkeys[2]);
DRIVER_STAT(pkey3, pkeys[3]);
/* XXX fix the following when dynamic table of devices used */
DRIVER_STAT(lid0, lid[0]);
DRIVER_STAT(lid1, lid[1]);
DRIVER_STAT(lid2, lid[2]);
DRIVER_STAT(lid3, lid[3]);
DRIVER_STAT(nports, nports);
DRIVER_STAT(null_intr, nullintr);
DRIVER_STAT(max_pkts_call, maxpkts_call);
DRIVER_STAT(avg_pkts_call, avgpkts_call);
DRIVER_STAT(page_locks, pagelocks);
DRIVER_STAT(page_unlocks, pageunlocks);
DRIVER_STAT(krdrops, krdrops);
/* XXX fix the following when dynamic table of devices used */
DRIVER_STAT(mlid0, mlid[0]);
DRIVER_STAT(mlid1, mlid[1]);
DRIVER_STAT(mlid2, mlid[2]);
DRIVER_STAT(mlid3, mlid[3]);
static struct attribute *driver_stat_attributes[] = {
&driver_attr_intrs.attr,
&driver_attr_err_intrs.attr,
&driver_attr_errs.attr,
&driver_attr_pkt_errs.attr,
&driver_attr_crc_errs.attr,
&driver_attr_hw_errs.attr,
&driver_attr_ib_link.attr,
&driver_attr_port0_pkts.attr,
&driver_attr_ether_spkts.attr,
&driver_attr_ether_rpkts.attr,
&driver_attr_sma_spkts.attr,
&driver_attr_sma_rpkts.attr,
&driver_attr_hdrq_full.attr,
&driver_attr_etid_full.attr,
&driver_attr_no_piobufs.attr,
&driver_attr_ports.attr,
&driver_attr_pkey0.attr,
&driver_attr_pkey1.attr,
&driver_attr_pkey2.attr,
&driver_attr_pkey3.attr,
&driver_attr_lid0.attr,
&driver_attr_lid1.attr,
&driver_attr_lid2.attr,
&driver_attr_lid3.attr,
&driver_attr_nports.attr,
&driver_attr_null_intr.attr,
&driver_attr_max_pkts_call.attr,
&driver_attr_avg_pkts_call.attr,
&driver_attr_page_locks.attr,
&driver_attr_page_unlocks.attr,
&driver_attr_krdrops.attr,
&driver_attr_mlid0.attr,
&driver_attr_mlid1.attr,
&driver_attr_mlid2.attr,
&driver_attr_mlid3.attr,
NULL
};
static struct attribute_group driver_stat_attr_group = {
.name = "stats",
.attrs = driver_stat_attributes
};
static ssize_t show_status(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
if (!dd->ipath_statusp) {
ret = -EINVAL;
goto bail;
}
ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
(unsigned long long) *(dd->ipath_statusp));
bail:
return ret;
}
static const char *ipath_status_str[] = {
"Initted",
"Disabled",
"Admin_Disabled",
"OIB_SMA",
"SMA",
"Present",
"IB_link_up",
"IB_configured",
"NoIBcable",
"Fatal_Hardware_Error",
NULL,
};
static ssize_t show_status_str(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
int i, any;
u64 s;
ssize_t ret;
if (!dd->ipath_statusp) {
ret = -EINVAL;
goto bail;
}
s = *(dd->ipath_statusp);
*buf = '\0';
for (any = i = 0; s && ipath_status_str[i]; i++) {
if (s & 1) {
if (any && strlcat(buf, " ", PAGE_SIZE) >=
PAGE_SIZE)
/* overflow */
break;
if (strlcat(buf, ipath_status_str[i],
PAGE_SIZE) >= PAGE_SIZE)
break;
any = 1;
}
s >>= 1;
}
if (any)
strlcat(buf, "\n", PAGE_SIZE);
ret = strlen(buf);
bail:
return ret;
}
static ssize_t show_boardversion(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
}
static ssize_t show_lid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
}
static ssize_t store_lid(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
u16 lid;
int ret;
ret = ipath_parse_ushort(buf, &lid);
if (ret < 0)
goto invalid;
if (lid == 0 || lid >= 0xc000) {
ret = -EINVAL;
goto invalid;
}
ipath_set_sps_lid(dd, lid, 0);
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid LID\n");
bail:
return ret;
}
static ssize_t show_mlid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
}
static ssize_t store_mlid(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
int unit;
u16 mlid;
int ret;
ret = ipath_parse_ushort(buf, &mlid);
if (ret < 0)
goto invalid;
unit = dd->ipath_unit;
dd->ipath_mlid = mlid;
ipath_stats.sps_mlid[unit] = mlid;
ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid MLID\n");
bail:
return ret;
}
static ssize_t show_guid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
u8 *guid;
guid = (u8 *) & (dd->ipath_guid);
return scnprintf(buf, PAGE_SIZE,
"%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
guid[0], guid[1], guid[2], guid[3],
guid[4], guid[5], guid[6], guid[7]);
}
static ssize_t store_guid(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
__be64 nguid;
u8 *ng;
int i;
if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
&guid[0], &guid[1], &guid[2], &guid[3],
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
ng = (u8 *) &nguid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
goto invalid;
ng[i] = guid[i];
}
dd->ipath_guid = nguid;
dd->ipath_nguid = 1;
ret = strlen(buf);
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid GUID\n");
ret = -EINVAL;
bail:
return ret;
}
static ssize_t show_nguid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
buf[sizeof dd->ipath_serial] = '\0';
memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
strcat(buf, "\n");
return strlen(buf);
}
static ssize_t show_unit(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
}
#define DEVICE_COUNTER(name, attr) \
static ssize_t show_counter_##name(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct ipath_devdata *dd = dev_get_drvdata(dev); \
return scnprintf(\
buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
ipath_snap_cntr( \
dd, offsetof(struct infinipath_counters, \
attr) / sizeof(u64))); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
DEVICE_COUNTER(lb_ints, LBIntCnt);
DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
DEVICE_COUNTER(rx_dwords, RxDwordCnt);
DEVICE_COUNTER(rx_ebps, RxEBPCnt);
DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
DEVICE_COUNTER(tx_dwords, TxDwordCnt);
DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
static struct attribute *dev_counter_attributes[] = {
&dev_attr_ib_link_downeds.attr,
&dev_attr_ib_link_err_recoveries.attr,
&dev_attr_ib_status_changes.attr,
&dev_attr_ib_symbol_errs.attr,
&dev_attr_lb_flow_stalls.attr,
&dev_attr_lb_ints.attr,
&dev_attr_rx_bad_formats.attr,
&dev_attr_rx_buf_ovfls.attr,
&dev_attr_rx_data_pkts.attr,
&dev_attr_rx_dropped_pkts.attr,
&dev_attr_rx_dwords.attr,
&dev_attr_rx_ebps.attr,
&dev_attr_rx_flow_ctrl_errs.attr,
&dev_attr_rx_flow_pkts.attr,
&dev_attr_rx_icrc_errs.attr,
&dev_attr_rx_len_errs.attr,
&dev_attr_rx_link_problems.attr,
&dev_attr_rx_lpcrc_errs.attr,
&dev_attr_rx_max_min_len_errs.attr,
&dev_attr_rx_p0_hdr_egr_ovfls.attr,
&dev_attr_rx_p1_hdr_egr_ovfls.attr,
&dev_attr_rx_p2_hdr_egr_ovfls.attr,
&dev_attr_rx_p3_hdr_egr_ovfls.attr,
&dev_attr_rx_p4_hdr_egr_ovfls.attr,
&dev_attr_rx_p5_hdr_egr_ovfls.attr,
&dev_attr_rx_p6_hdr_egr_ovfls.attr,
&dev_attr_rx_p7_hdr_egr_ovfls.attr,
&dev_attr_rx_p8_hdr_egr_ovfls.attr,
&dev_attr_rx_pkey_mismatches.attr,
&dev_attr_rx_tid_full_errs.attr,
&dev_attr_rx_tid_valid_errs.attr,
&dev_attr_rx_vcrc_errs.attr,
&dev_attr_tx_data_pkts.attr,
&dev_attr_tx_dropped_pkts.attr,
&dev_attr_tx_dwords.attr,
&dev_attr_tx_flow_pkts.attr,
&dev_attr_tx_flow_stalls.attr,
&dev_attr_tx_len_errs.attr,
&dev_attr_tx_max_min_len_errs.attr,
&dev_attr_tx_underruns.attr,
&dev_attr_tx_unsup_vl_errs.attr,
NULL
};
static struct attribute_group dev_counter_attr_group = {
.name = "counters",
.attrs = dev_counter_attributes
};
static ssize_t store_reset(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
int ret;
if (count < 5 || memcmp(buf, "reset", 5)) {
ret = -EINVAL;
goto bail;
}
if (dd->ipath_flags & IPATH_DISABLED) {
/*
* post-reset init would re-enable interrupts, etc.
* so don't allow reset on disabled devices. Not
* perfect error, but about the best choice.
*/
dev_info(dev,"Unit %d is disabled, can't reset\n",
dd->ipath_unit);
ret = -EINVAL;
}
ret = ipath_reset_device(dd->ipath_unit);
bail:
return ret<0 ? ret : count;
}
static ssize_t store_link_state(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
int ret, r;
u16 state;
ret = ipath_parse_ushort(buf, &state);
if (ret < 0)
goto invalid;
r = ipath_layer_set_linkstate(dd, state);
if (r < 0) {
ret = r;
goto bail;
}
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid link state\n");
bail:
return ret;
}
static ssize_t show_mtu(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
}
static ssize_t store_mtu(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
u16 mtu = 0;
int r;
ret = ipath_parse_ushort(buf, &mtu);
if (ret < 0)
goto invalid;
r = ipath_layer_set_mtu(dd, mtu);
if (r < 0)
ret = r;
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid MTU\n");
bail:
return ret;
}
static ssize_t show_enabled(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%u\n",
(dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
}
static ssize_t store_enabled(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
u16 enable = 0;
ret = ipath_parse_ushort(buf, &enable);
if (ret < 0) {
ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
goto bail;
}
if (enable) {
if (!(dd->ipath_flags & IPATH_DISABLED))
goto bail;
dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
/* same as post-reset */
ret = ipath_init_chip(dd, 1);
if (ret)
ipath_dev_err(dd, "Failed to enable unit %d\n",
dd->ipath_unit);
else {
dd->ipath_flags &= ~IPATH_DISABLED;
*dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
}
}
else if (!(dd->ipath_flags & IPATH_DISABLED)) {
dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
ipath_shutdown_device(dd);
dd->ipath_flags |= IPATH_DISABLED;
*dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
}
bail:
return ret;
}
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
static struct attribute *driver_attributes[] = {
&driver_attr_num_units.attr,
&driver_attr_version.attr,
NULL
};
static struct attribute_group driver_attr_group = {
.attrs = driver_attributes
};
static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
&dev_attr_lid.attr,
&dev_attr_link_state.attr,
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
&dev_attr_boardversion.attr,
&dev_attr_unit.attr,
&dev_attr_enabled.attr,
NULL
};
static struct attribute_group dev_attr_group = {
.attrs = dev_attributes
};
/**
* ipath_expose_reset - create a device reset file
* @dev: the device structure
*
* Only expose a file that lets us reset the device after someone
* enters diag mode. A device reset is quite likely to crash the
* machine entirely, so we don't want to normally make it
* available.
*/
int ipath_expose_reset(struct device *dev)
{
return device_create_file(dev, &dev_attr_reset);
}
int ipath_driver_create_group(struct device_driver *drv)
{
int ret;
ret = sysfs_create_group(&drv->kobj, &driver_attr_group);
if (ret)
goto bail;
ret = sysfs_create_group(&drv->kobj, &driver_stat_attr_group);
if (ret)
sysfs_remove_group(&drv->kobj, &driver_attr_group);
bail:
return ret;
}
void ipath_driver_remove_group(struct device_driver *drv)
{
sysfs_remove_group(&drv->kobj, &driver_stat_attr_group);
sysfs_remove_group(&drv->kobj, &driver_attr_group);
}
int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
{
int ret;
char unit[5];
ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
if (ret)
goto bail;
ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
if (ret)
goto bail_attrs;
snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit);
if (ret == 0)
goto bail;
sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
bail_attrs:
sysfs_remove_group(&dev->kobj, &dev_attr_group);
bail:
return ret;
}
void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
{
char unit[5];
snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
sysfs_remove_link(&dev->driver->kobj, unit);
sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
sysfs_remove_group(&dev->kobj, &dev_attr_group);
device_remove_file(dev, &dev_attr_reset);
}

View File

@ -0,0 +1,645 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ipath_verbs.h"
#include "ips_common.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
struct ib_wc *wc)
{
if (++qp->s_last == qp->s_size)
qp->s_last = 0;
if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc->wr_id = wqe->wr.wr_id;
wc->status = IB_WC_SUCCESS;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = wqe->length;
wc->qp_num = qp->ibqp.qp_num;
wc->src_qp = qp->remote_qpn;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
wc->sl = qp->remote_ah_attr.sl;
wc->dlid_path_bits = 0;
wc->port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
}
wqe = get_swqe_ptr(qp, qp->s_last);
}
/**
* ipath_do_uc_send - do a send on a UC queue
* @data: contains a pointer to the QP to send on
*
* Process entries in the send work queue until the queue is exhausted.
* Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, after we drop the QP lock, two threads could send
* packets out of order.
* This is similar to ipath_do_rc_send() below except we don't have
* timeouts or resends.
*/
void ipath_do_uc_send(unsigned long data)
{
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe;
unsigned long flags;
u16 lrh0;
u32 hwords;
u32 nwords;
u32 extra_bytes;
u32 bth0;
u32 bth2;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
u32 len;
struct ipath_other_headers *ohdr;
struct ib_wc wc;
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
goto bail;
if (unlikely(qp->remote_ah_attr.dlid ==
ipath_layer_get_lid(dev->dd))) {
/* Pass in an uninitialized ib_wc to save stack space. */
ipath_ruc_loopback(qp, &wc);
clear_bit(IPATH_S_BUSY, &qp->s_flags);
goto bail;
}
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr.u.l.oth;
again:
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
/*
* If no PIO bufs are available, return.
* An interrupt will call ipath_ib_piobufavail()
* when one is available.
*/
if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
(u32 *) &qp->s_hdr,
qp->s_cur_size,
qp->s_cur_sge)) {
ipath_no_bufs_available(qp, dev);
goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
qp->s_hdrwords = 0;
}
lrh0 = IPS_LRH_BTH;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
/*
* The lock is needed to synchronize between
* setting qp->s_ack_state and post_send().
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
goto done;
bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
/* Send a request. */
wqe = get_swqe_ptr(qp, qp->s_last);
switch (qp->s_state) {
default:
/*
* Signal the completion of the last send (if there is
* one).
*/
if (qp->s_last != qp->s_tail)
complete_last_send(qp, wqe, &wc);
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
goto done;
/*
* Start a new request.
*/
qp->s_psn = wqe->psn = qp->s_next_psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_len = len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_ONLY);
else {
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_ONLY);
else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
ohdr->u.rc.imm_data = wqe->wr.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
break;
default:
goto done;
}
if (++qp->s_tail >= qp->s_size)
qp->s_tail = 0;
break;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_LAST);
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
break;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_LAST);
else {
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
break;
}
bth2 = qp->s_next_psn++ & IPS_PSN_MASK;
qp->s_len -= len;
bth0 |= qp->s_state << 24;
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Construct the header. */
extra_bytes = (4 - len) & 3;
nwords = (len + extra_bytes) >> 2;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
/* Header size in 32-bit words. */
hwords += 10;
lrh0 = IPS_LRH_GRH;
qp->s_hdr.u.l.grh.version_tclass_flow =
cpu_to_be32((6 << 28) |
(qp->remote_ah_attr.grh.traffic_class
<< 20) |
qp->remote_ah_attr.grh.flow_label);
qp->s_hdr.u.l.grh.paylen =
cpu_to_be16(((hwords - 12) + nwords +
SIZE_OF_CRC) << 2);
/* next_hdr is defined by C8-7 in ch. 8.4.1 */
qp->s_hdr.u.l.grh.next_hdr = 0x1B;
qp->s_hdr.u.l.grh.hop_limit =
qp->remote_ah_attr.grh.hop_limit;
/* The SGID is 32-bit aligned. */
qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
dev->gid_prefix;
qp->s_hdr.u.l.grh.sgid.global.interface_id =
ipath_layer_get_guid(dev->dd);
qp->s_hdr.u.l.grh.dgid = qp->remote_ah_attr.grh.dgid;
}
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
lrh0 |= qp->remote_ah_attr.sl << 4;
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
/* DEST LID */
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
/* Check for more work to do. */
goto again;
done:
spin_unlock_irqrestore(&qp->s_lock, flags);
clear_bit(IPATH_S_BUSY, &qp->s_flags);
bail:
return;
}
/**
* ipath_uc_rcv - handle an incoming UC packet
* @dev: the device the packet came in on
* @hdr: the header of the packet
* @has_grh: true if the packet has a GRH
* @data: the packet data
* @tlen: the length of the packet
* @qp: the QP for this packet.
*
* This is called from ipath_qp_rcv() to process an incoming UC packet
* for the given QP.
* Called at interrupt level.
*/
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 psn;
u32 pad;
unsigned long flags;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ib_reth *reth;
int header_in_data;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
hdrsize = 8 + 12; /* LRH + BTH */
psn = be32_to_cpu(ohdr->bth[2]);
header_in_data = 0;
} else {
ohdr = &hdr->u.l.oth;
hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
/*
* The header with GRH is 60 bytes and the
* core driver sets the eager header buffer
* size to 56 bytes so the last 4 bytes of
* the BTH header (PSN) is in the data buffer.
*/
header_in_data =
ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
} else
psn = be32_to_cpu(ohdr->bth[2]);
}
/*
* The opcode is in the low byte when its in network order
* (top byte when in host order).
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
wc.imm_data = 0;
wc.wc_flags = 0;
spin_lock_irqsave(&qp->r_rq.lock, flags);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
/*
* Handle a sequence error.
* Silently drop any current message.
*/
qp->r_psn = psn;
inv:
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
goto send_first;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
goto rdma_first;
default:
dev->n_pkt_drops++;
goto done;
}
}
/* Check for opcode sequence errors. */
switch (qp->r_state) {
case OP(SEND_FIRST):
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
goto inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
break;
goto inv;
default:
if (opcode == OP(SEND_FIRST) ||
opcode == OP(SEND_ONLY) ||
opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_FIRST) ||
opcode == OP(RDMA_WRITE_ONLY) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
break;
goto inv;
}
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
if (qp->r_reuse_sge) {
qp->r_reuse_sge = 0;
qp->r_sge = qp->s_rdma_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
goto done;
}
/* Save the WQE so we can reuse it in case of an error. */
qp->s_rdma_sge = qp->r_sge;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto send_last;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, pmtu);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
wc.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
/* FALLTHROUGH */
case OP(SEND_LAST):
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto done;
}
/* XXX Need to free SGEs */
last_imm:
ipath_copy_sge(&qp->r_sge, data, tlen);
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp_num = qp->ibqp.qp_num;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
__constant_cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
rdma_first:
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
else {
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
hdrsize += sizeof(*reth);
qp->r_len = be32_to_cpu(reth->length);
qp->r_rcv_len = 0;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
/* Check rkey */
if (unlikely(!ipath_rkey_ok(
dev, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE))) {
dev->n_pkt_drops++;
goto done;
}
} else {
qp->r_sge.sg_list = NULL;
qp->r_sge.sge.mr = NULL;
qp->r_sge.sge.vaddr = NULL;
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
if (unlikely(!(qp->qp_access_flags &
IB_ACCESS_REMOTE_WRITE))) {
dev->n_pkt_drops++;
goto done;
}
if (opcode == OP(RDMA_WRITE_ONLY))
goto rdma_last;
else if (opcode ==
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
goto rdma_last_imm;
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, pmtu);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
rdma_last_imm:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
if (qp->r_reuse_sge) {
qp->r_reuse_sge = 0;
} else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
}
if (header_in_data) {
wc.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
wc.byte_len = 0;
goto last_imm;
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, tlen);
break;
default:
/* Drop packet for unknown opcodes. */
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
dev->n_pkt_drops++;
goto bail;
}
qp->r_psn++;
qp->r_state = opcode;
done:
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
bail:
return;
}

View File

@ -0,0 +1,621 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
#include "ips_common.h"
/**
* ipath_ud_loopback - handle send on loopback QPs
* @sqp: the QP
* @ss: the SGE state
* @length: the length of the data to send
* @wr: the work request
* @wc: the work completion entry
*
* This is called from ipath_post_ud_send() to forward a WQE addressed
* to the same HCA.
*/
void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss,
u32 length, struct ib_send_wr *wr, struct ib_wc *wc)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
struct ib_ah_attr *ah_attr;
unsigned long flags;
struct ipath_rq *rq;
struct ipath_srq *srq;
struct ipath_sge_state rsge;
struct ipath_sge *sge;
struct ipath_rwqe *wqe;
qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
if (!qp)
return;
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (unlikely(qp->ibqp.qp_num &&
((int) wr->wr.ud.remote_qkey < 0
? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
goto done;
}
/*
* A GRH is expected to preceed the data even if not
* present on the wire.
*/
wc->byte_len = length + sizeof(struct ib_grh);
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
wc->wc_flags = IB_WC_WITH_IMM;
wc->imm_data = wr->imm_data;
} else {
wc->wc_flags = 0;
wc->imm_data = 0;
}
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
* since ipath_post_receive() won't fill the empty slot.
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
rq = &srq->rq;
} else {
srq = NULL;
rq = &qp->r_rq;
}
spin_lock_irqsave(&rq->lock, flags);
if (rq->tail == rq->head) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto done;
}
/* Silently drop packets which are too big. */
wqe = get_rwqe_ptr(rq, rq->tail);
if (wc->byte_len > wqe->length) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto done;
}
wc->wr_id = wqe->wr_id;
rsge.sge = wqe->sg_list[0];
rsge.sg_list = wqe->sg_list + 1;
rsge.num_sge = wqe->num_sge;
if (++rq->tail >= rq->size)
rq->tail = 0;
if (srq && srq->ibsrq.event_handler) {
u32 n;
if (rq->head < rq->tail)
n = rq->size + rq->head - rq->tail;
else
n = rq->head - rq->tail;
if (n < srq->limit) {
struct ib_event ev;
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
srq->ibsrq.event_handler(&ev,
srq->ibsrq.srq_context);
} else
spin_unlock_irqrestore(&rq->lock, flags);
} else
spin_unlock_irqrestore(&rq->lock, flags);
ah_attr = &to_iah(wr->wr.ud.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
wc->wc_flags |= IB_WC_GRH;
} else
ipath_skip_sge(&rsge, sizeof(struct ib_grh));
sge = &ss->sge;
while (length) {
u32 len = sge->length;
if (len > length)
len = length;
BUG_ON(len == 0);
ipath_copy_sge(&rsge, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
length -= len;
}
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->vendor_err = 0;
wc->qp_num = qp->ibqp.qp_num;
wc->src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc->pkey_index = 0;
wc->slid = ipath_layer_get_lid(dev->dd) |
(ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
wc->sl = ah_attr->sl;
wc->dlid_path_bits =
ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
wr->send_flags & IB_SEND_SOLICITED);
done:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
/**
* ipath_post_ud_send - post a UD send on QP
* @qp: the QP
* @wr: the work request
*
* Note that we actually send the data as it is posted instead of putting
* the request into a ring buffer. If we wanted to use a ring buffer,
* we would need to save a reference to the destination address in the SWQE.
*/
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct ipath_sge_state ss;
struct ipath_sge *sg_list;
struct ib_wc wc;
u32 hwords;
u32 nwords;
u32 len;
u32 extra_bytes;
u32 bth0;
u16 lrh0;
u16 lid;
int i;
int ret;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
ret = 0;
goto bail;
}
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
ret = -EINVAL;
goto bail;
}
if (wr->num_sge > 1) {
sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list),
GFP_ATOMIC);
if (!sg_list) {
ret = -ENOMEM;
goto bail;
}
} else
sg_list = NULL;
/* Check the buffer to send. */
ss.sg_list = sg_list;
ss.sge.mr = NULL;
ss.sge.vaddr = NULL;
ss.sge.length = 0;
ss.sge.sge_length = 0;
ss.num_sge = 0;
len = 0;
for (i = 0; i < wr->num_sge; i++) {
/* Check LKEY */
if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
ret = -EINVAL;
goto bail;
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
sg_list + ss.num_sge - 1 : &ss.sge,
&wr->sg_list[i], 0)) {
ret = -EINVAL;
goto bail;
}
len += wr->sg_list[i].length;
ss.num_sge++;
}
extra_bytes = (4 - len) & 3;
nwords = (len + extra_bytes) >> 2;
/* Construct the header. */
ah_attr = &to_iah(wr->wr.ud.ah)->attr;
if (ah_attr->dlid == 0) {
ret = -EINVAL;
goto bail;
}
if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE) {
if (ah_attr->dlid != IPS_PERMISSIVE_LID)
dev->n_multicast_xmit++;
else
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
lid = ah_attr->dlid &
~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
/*
* Pass in an uninitialized ib_wc to save stack
* space.
*/
ipath_ud_loopback(qp, &ss, len, wr, &wc);
goto done;
}
}
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
hwords = 17;
lrh0 = IPS_LRH_GRH;
ohdr = &qp->s_hdr.u.l.oth;
qp->s_hdr.u.l.grh.version_tclass_flow =
cpu_to_be32((6 << 28) |
(ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
qp->s_hdr.u.l.grh.paylen =
cpu_to_be16(((wr->opcode ==
IB_WR_SEND_WITH_IMM ? 6 : 5) +
nwords + SIZE_OF_CRC) << 2);
/* next_hdr is defined by C8-7 in ch. 8.4.1 */
qp->s_hdr.u.l.grh.next_hdr = 0x1B;
qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit;
/* The SGID is 32-bit aligned. */
qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
dev->gid_prefix;
qp->s_hdr.u.l.grh.sgid.global.interface_id =
ipath_layer_get_guid(dev->dd);
qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
*/
} else {
/* Header size in 32-bit words. */
hwords = 7;
lrh0 = IPS_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
ohdr->u.ud.imm_data = wr->imm_data;
wc.imm_data = wr->imm_data;
hwords += 1;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else if (wr->opcode == IB_WR_SEND) {
wc.imm_data = 0;
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
} else {
ret = -EINVAL;
goto bail;
}
lrh0 |= ah_attr->sl << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
lid = ipath_layer_get_lid(dev->dd);
if (lid) {
lid |= ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
qp->s_hdr.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
if (wr->send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPS_DEFAULT_P_KEY :
ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
ohdr->bth[1] = ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
ah_attr->dlid != IPS_PERMISSIVE_LID ?
__constant_cpu_to_be32(IPS_MULTICAST_QPN) :
cpu_to_be32(wr->wr.ud.remote_qpn);
/* XXX Could lose a PSN count but not worth locking */
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPS_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ?
qp->qkey : wr->wr.ud.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr,
len, &ss))
dev->n_no_piobuf++;
done:
/* Queue the completion status entry. */
if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
(wr->send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wr->wr_id;
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.opcode = IB_WC_SEND;
wc.byte_len = len;
wc.qp_num = qp->ibqp.qp_num;
wc.src_qp = 0;
wc.wc_flags = 0;
/* XXX initialize other fields? */
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
}
kfree(sg_list);
ret = 0;
bail:
return ret;
}
/**
* ipath_ud_rcv - receive an incoming UD packet
* @dev: the device the packet came in on
* @hdr: the packet header
* @has_grh: true if the packet has a GRH
* @data: the packet data
* @tlen: the packet length
* @qp: the QP the packet came on
*
* This is called from ipath_qp_rcv() to process an incoming UD packet
* for the given QP.
* Called at interrupt level.
*/
void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 pad;
unsigned long flags;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
struct ipath_rq *rq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
u16 dlid;
int header_in_data;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
header_in_data = 0;
} else {
ohdr = &hdr->u.l.oth;
hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
/*
* The header with GRH is 68 bytes and the core driver sets
* the eager header buffer size to 56 bytes so the last 12
* bytes of the IB header is in the data buffer.
*/
header_in_data =
ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
if (header_in_data) {
qkey = be32_to_cpu(((__be32 *) data)[1]);
src_qp = be32_to_cpu(((__be32 *) data)[2]);
data += 12;
} else {
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
}
}
src_qp &= IPS_QPN_MASK;
/*
* Check that the permissive LID is only used on QP0
* and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
*/
if (qp->ibqp.qp_num) {
if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE)) {
dev->n_pkt_drops++;
goto bail;
}
if (unlikely(qkey != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
goto bail;
}
} else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE) {
struct ib_smp *smp = (struct ib_smp *) data;
if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
dev->n_pkt_drops++;
goto bail;
}
}
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
if (unlikely(tlen < (hdrsize + pad + 4))) {
/* Drop incomplete packets. */
dev->n_pkt_drops++;
goto bail;
}
tlen -= hdrsize + pad + 4;
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely((qp->ibqp.qp_num == 0 &&
(tlen != 256 ||
(be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
(qp->ibqp.qp_num == 1 &&
(tlen != 256 ||
(be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
dev->n_pkt_drops++;
goto bail;
}
/*
* A GRH is expected to preceed the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
/*
* The opcode is in the low byte when its in network order
* (top byte when in host order).
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
if (header_in_data) {
wc.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else
wc.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
hdrsize += sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.imm_data = 0;
wc.wc_flags = 0;
} else {
dev->n_pkt_drops++;
goto bail;
}
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
* since ipath_post_receive() won't fill the empty slot.
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
rq = &srq->rq;
} else {
srq = NULL;
rq = &qp->r_rq;
}
spin_lock_irqsave(&rq->lock, flags);
if (rq->tail == rq->head) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto bail;
}
/* Silently drop packets which are too big. */
wqe = get_rwqe_ptr(rq, rq->tail);
if (wc.byte_len > wqe->length) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto bail;
}
wc.wr_id = wqe->wr_id;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->num_sge;
if (++rq->tail >= rq->size)
rq->tail = 0;
if (srq && srq->ibsrq.event_handler) {
u32 n;
if (rq->head < rq->tail)
n = rq->size + rq->head - rq->tail;
else
n = rq->head - rq->tail;
if (n < srq->limit) {
struct ib_event ev;
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
srq->ibsrq.event_handler(&ev,
srq->ibsrq.srq_context);
} else
spin_unlock_irqrestore(&rq->lock, flags);
} else
spin_unlock_irqrestore(&rq->lock, flags);
if (has_grh) {
ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh));
wc.wc_flags |= IB_WC_GRH;
} else
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp_num = qp->ibqp.qp_num;
wc.src_qp = src_qp;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
wc.slid = be16_to_cpu(hdr->lrh[3]);
wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
dlid = be16_to_cpu(hdr->lrh[1]);
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
wc.dlid_path_bits = dlid >= IPS_MULTICAST_LID_BASE ? 0 :
dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
__constant_cpu_to_be32(1 << 23)) != 0);
bail:;
}

View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/device.h>
#include "ipath_kernel.h"
static void __ipath_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
size_t i;
for (i = 0; i < num_pages; i++) {
ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
(unsigned long) num_pages, p[i]);
if (dirty)
set_page_dirty_lock(p[i]);
put_page(p[i]);
}
}
/* call with current->mm->mmap_sem held */
static int __get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
int ret;
#if 0
/*
* XXX - causes MPI programs to fail, haven't had time to check
* yet
*/
if (!capable(CAP_IPC_LOCK)) {
ret = -EPERM;
goto bail;
}
#endif
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
PAGE_SHIFT;
if (num_pages > lock_limit) {
ret = -ENOMEM;
goto bail;
}
ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
(unsigned long) num_pages, start_page);
for (got = 0; got < num_pages; got += ret) {
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
p + got, vma);
if (ret < 0)
goto bail_release;
}
current->mm->locked_vm += num_pages;
ret = 0;
goto bail;
bail_release:
__ipath_release_user_pages(p, got, 0);
bail:
return ret;
}
/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
* @p: the output page structures
*
* This function takes a given start page (page aligned user virtual
* address) and pins it and the following specified number of pages. For
* now, num_pages is always 1, but that will probably change at some point
* (because caller is doing expected sends on a single virtually contiguous
* buffer, so we can do all pages at once).
*/
int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
int ret;
down_write(&current->mm->mmap_sem);
ret = __get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
return ret;
}
/**
* ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared
* @start_page: the page to lock
* @p: the output page structure
*
* This is similar to ipath_get_user_pages, but it's always one page, and we
* mark the page as locked for I/O, and shared. This is used for the user
* process page that contains the destination address for the rcvhdrq tail
* update, so we need to have the vma. If we don't do this, the page can be
* taken away from us on fork, even if the child never touches it, and then
* the user process never sees the tail register updates.
*/
int ipath_get_user_pages_nocopy(unsigned long page, struct page **p)
{
struct vm_area_struct *vma;
int ret;
down_write(&current->mm->mmap_sem);
ret = __get_user_pages(page, 1, p, &vma);
up_write(&current->mm->mmap_sem);
return ret;
}
void ipath_release_user_pages(struct page **p, size_t num_pages)
{
down_write(&current->mm->mmap_sem);
__ipath_release_user_pages(p, num_pages, 1);
current->mm->locked_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
struct ipath_user_pages_work {
struct work_struct work;
struct mm_struct *mm;
unsigned long num_pages;
};
static void user_pages_account(void *ptr)
{
struct ipath_user_pages_work *work = ptr;
down_write(&work->mm->mmap_sem);
work->mm->locked_vm -= work->num_pages;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
}
void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
{
struct ipath_user_pages_work *work;
struct mm_struct *mm;
__ipath_release_user_pages(p, num_pages, 1);
mm = get_task_mm(current);
if (!mm)
goto bail;
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (!work)
goto bail_mm;
goto bail;
INIT_WORK(&work->work, user_pages_account, work);
work->mm = mm;
work->num_pages = num_pages;
bail_mm:
mmput(mm);
bail:
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,697 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IPATH_VERBS_H
#define IPATH_VERBS_H
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <rdma/ib_pack.h>
#include "ipath_layer.h"
#include "verbs_debug.h"
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define IPATH_UVERBS_ABI_VERSION 1
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
* notifications are armed.
*/
#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
#define IB_RNR_NAK 0x20
#define IB_NAK_PSN_ERROR 0x60
#define IB_NAK_INVALID_REQUEST 0x61
#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
#define IPATH_POST_SEND_OK 0x01
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
/* Mandatory IB performance counter select values. */
#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001)
#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002)
#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003)
#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004)
#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005)
struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
} __attribute__ ((packed));
struct ib_atomic_eth {
__be64 vaddr;
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
} __attribute__ ((packed));
struct ipath_other_headers {
__be32 bth[3];
union {
struct {
__be32 deth[2];
__be32 imm_data;
} ud;
struct {
struct ib_reth reth;
__be32 imm_data;
} rc;
struct {
__be32 aeth;
__be64 atomic_ack_eth;
} at;
__be32 imm_data;
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
} __attribute__ ((packed));
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
* long (72 w/ imm_data). Only the first 56 bytes of the IB header
* will be in the eager header buffer. The remaining 12 or 16 bytes
* are in the data buffer.
*/
struct ipath_ib_header {
__be16 lrh[4];
union {
struct {
struct ib_grh grh;
struct ipath_other_headers oth;
} l;
struct ipath_other_headers oth;
} u;
} __attribute__ ((packed));
/*
* There is one struct ipath_mcast for each multicast GID.
* All attached QPs are then stored as a list of
* struct ipath_mcast_qp.
*/
struct ipath_mcast_qp {
struct list_head list;
struct ipath_qp *qp;
};
struct ipath_mcast {
struct rb_node rb_node;
union ib_gid mgid;
struct list_head qp_list;
wait_queue_head_t wait;
atomic_t refcount;
};
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
struct ipath_mregion mr; /* must be last */
};
/* Fast memory region */
struct ipath_fmr {
struct ib_fmr ibfmr;
u8 page_shift;
struct ipath_mregion mr; /* must be last */
};
/* Protection domain */
struct ipath_pd {
struct ib_pd ibpd;
int user; /* non-zero if created from user space */
};
/* Address Handle */
struct ipath_ah {
struct ib_ah ibah;
struct ib_ah_attr attr;
};
/*
* Quick description of our CQ/QP locking scheme:
*
* We have one global lock that protects dev->cq/qp_table. Each
* struct ipath_cq/qp also has its own lock. An individual qp lock
* may be taken inside of an individual cq lock. Both cqs attached to
* a qp may be locked, with the send cq locked first. No other
* nesting should be done.
*
* Each struct ipath_cq/qp also has an atomic_t ref count. The
* pointer from the cq/qp_table to the struct counts as one reference.
* This reference also is good for access through the consumer API, so
* modifying the CQ/QP etc doesn't need to take another reference.
* Access because of a completion being polled does need a reference.
*
* Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
* destroy function to sleep on.
*
* This means that access from the consumer API requires nothing but
* taking the struct's lock.
*
* Access because of a completion event should go as follows:
* - lock cq/qp_table and look up struct
* - increment ref count in struct
* - drop cq/qp_table lock
* - lock struct, do your thing, and unlock struct
* - decrement ref count; if zero, wake up waiters
*
* To destroy a CQ/QP, we can do the following:
* - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
* - decrement ref count
* - wait_event until ref count is zero
*
* It is the consumer's responsibilty to make sure that no QP
* operations (WQE posting or state modification) are pending when the
* QP is destroyed. Also, the consumer must make sure that calls to
* qp_modify are serialized.
*
* Possible optimizations (wait for profile data to see if/where we
* have locks bouncing between CPUs):
* - split cq/qp table lock into n separate (cache-aligned) locks,
* indexed (say) by the page in the table
*/
struct ipath_cq {
struct ib_cq ibcq;
struct tasklet_struct comptask;
spinlock_t lock;
u8 notify;
u8 triggered;
u32 head; /* new records added to the head */
u32 tail; /* poll_cq() reads from here. */
struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */
};
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
* in qp->s_max_sge.
*/
struct ipath_swqe {
struct ib_send_wr wr; /* don't use wr.sg_list */
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
u32 length; /* total length of data in sg_list */
struct ipath_sge sg_list[0];
};
/*
* Receive work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
* in qp->r_max_sge.
*/
struct ipath_rwqe {
u64 wr_id;
u32 length; /* total length of data in sg_list */
u8 num_sge;
struct ipath_sge sg_list[0];
};
struct ipath_rq {
spinlock_t lock;
u32 head; /* new work requests posted to the head */
u32 tail; /* receives pull requests from here. */
u32 size; /* size of RWQE array */
u8 max_sge;
struct ipath_rwqe *wq; /* RWQE array */
};
struct ipath_srq {
struct ib_srq ibsrq;
struct ipath_rq rq;
/* send signal when number of RWQEs < limit */
u32 limit;
};
/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
* Variables prefixed with ack_ are for responder replies.
*
* Common variables are protected by both r_rq.lock and s_lock in that order
* which only happens in modify_qp() or changing the QP 'state'.
*/
struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
struct tasklet_struct s_task;
struct ipath_sge_state *s_cur_sge;
struct ipath_sge_state s_sge; /* current send request data */
/* current RDMA read send data */
struct ipath_sge_state s_rdma_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
unsigned long s_flags;
u32 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
u32 s_rdma_len; /* total length of s_rdma_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */
u64 s_ack_atomic; /* data for atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
u64 r_atomic_data; /* data for last atomic op */
u32 r_atomic_psn; /* PSN of last atomic op */
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
u8 s_max_sge; /* size of s_wq->sg_list */
u8 qp_access_flags;
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_min_rnr_timer;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_pkey_index; /* PKEY index to use */
enum ib_mtu path_mtu;
atomic_t msn; /* message sequence number */
u32 remote_qpn;
u32 qkey; /* QKEY for this QP (for UD or RD) */
u32 s_size; /* send work queue size */
u32 s_head; /* new entries added here */
u32 s_tail; /* next entry to process */
u32 s_cur; /* current work queue entry */
u32 s_last; /* last un-ACK'ed entry */
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
struct ipath_rq r_rq; /* receive work queue */
};
/*
* Bit definitions for s_flags.
*/
#define IPATH_S_BUSY 0
#define IPATH_S_SIGNAL_REQ_WR 1
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
*/
static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
unsigned n)
{
return (struct ipath_swqe *)((char *)qp->s_wq +
(sizeof(struct ipath_swqe) +
qp->s_max_sge *
sizeof(struct ipath_sge)) * n);
}
/*
* Since struct ipath_rwqe is not a fixed size, we can't simply index into
* struct ipath_rq.wq. This function does the array index computation.
*/
static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
unsigned n)
{
return (struct ipath_rwqe *)
((char *) rq->wq +
(sizeof(struct ipath_rwqe) +
rq->max_sge * sizeof(struct ipath_sge)) * n);
}
/*
* QPN-map pages start out as NULL, they get allocated upon
* first use and are never deallocated. This way,
* large bitmaps are not allocated unless large numbers of QPs are used.
*/
struct qpn_map {
atomic_t n_free;
void *page;
};
struct ipath_qp_table {
spinlock_t lock;
u32 last; /* last QP number allocated */
u32 max; /* size of the hash table */
u32 nmaps; /* size of the map table */
struct ipath_qp **table;
/* bit map of free numbers */
struct qpn_map map[QPNMAP_ENTRIES];
};
struct ipath_lkey_table {
spinlock_t lock;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */
struct ipath_mregion **table;
};
struct ipath_opcode_stats {
u64 n_packets; /* number of packets */
u64 n_bytes; /* total number of bytes */
};
struct ipath_ibdev {
struct ib_device ibdev;
struct list_head dev_list;
struct ipath_devdata *dd;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
u8 mkeyprot_resv_lmc;
/* non-zero when timer is set */
unsigned long mkey_lease_timeout;
/* The following fields are really per port. */
struct ipath_qp_table qp_table;
struct ipath_lkey_table lk_table;
struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
struct list_head piowait; /* list for wait PIO buf */
/* list of QPs waiting for RNR timer */
struct list_head rnrwait;
spinlock_t pending_lock;
__be64 sys_image_guid; /* in network order */
__be64 gid_prefix; /* in network order */
__be64 mkey;
u64 ipath_sword; /* total dwords sent (sample result) */
u64 ipath_rword; /* total dwords received (sample result) */
u64 ipath_spkts; /* total packets sent (sample result) */
u64 ipath_rpkts; /* total packets received (sample result) */
/* # of ticks no data sent (sample result) */
u64 ipath_xmit_wait;
u64 rcv_errors; /* # of packets with SW detected rcv errs */
u64 n_unicast_xmit; /* total unicast packets sent */
u64 n_unicast_rcv; /* total unicast packets received */
u64 n_multicast_xmit; /* total multicast packets sent */
u64 n_multicast_rcv; /* total multicast packets received */
u64 n_symbol_error_counter; /* starting count for PMA */
u64 n_link_error_recovery_counter; /* starting count for PMA */
u64 n_link_downed_counter; /* starting count for PMA */
u64 n_port_rcv_errors; /* starting count for PMA */
u64 n_port_rcv_remphys_errors; /* starting count for PMA */
u64 n_port_xmit_discards; /* starting count for PMA */
u64 n_port_xmit_data; /* starting count for PMA */
u64 n_port_rcv_data; /* starting count for PMA */
u64 n_port_xmit_packets; /* starting count for PMA */
u64 n_port_rcv_packets; /* starting count for PMA */
u32 n_pkey_violations; /* starting count for PMA */
u32 n_rc_resends;
u32 n_rc_acks;
u32 n_rc_qacks;
u32 n_seq_naks;
u32 n_rdma_seq;
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
u32 n_pkt_drops;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
u32 n_no_piobuf;
u32 port_cap_flags;
u32 pma_sample_start;
u32 pma_sample_interval;
__be16 pma_counter_select[5];
u16 pma_tag;
u16 qkey_violations;
u16 mkey_violations;
u16 mkey_lease_period;
u16 pending_index; /* which pending queue is active */
u8 pma_sample_status;
u8 subnet_timeout;
u8 link_width_enabled;
u8 vl_high_limit;
struct ipath_opcode_stats opstats[128];
};
struct ipath_ucontext {
struct ib_ucontext ibucontext;
};
static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct ipath_mr, ibmr);
}
static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct ipath_fmr, ibfmr);
}
static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct ipath_pd, ibpd);
}
static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
{
return container_of(ibah, struct ipath_ah, ibah);
}
static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct ipath_cq, ibcq);
}
static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct ipath_srq, ibsrq);
}
static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct ipath_qp, ibqp);
}
static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
{
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
struct ib_wc *in_wc,
struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
*ibucontext)
{
return container_of(ibucontext, struct ipath_ucontext, ibucontext);
}
/*
* Compare the lower 24 bits of the two values.
* Returns an integer <, ==, or > than zero.
*/
static inline int ipath_cmp24(u32 a, u32 b)
{
return (((int) a) - ((int) b)) << 8;
}
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_mcast_tree_empty(void);
__be32 ipath_compute_aeth(struct ipath_qp *qp);
struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int ipath_destroy_qp(struct ib_qp *ibqp);
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask);
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
void ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
void ipath_error_qp(struct ipath_qp *qp);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
void ipath_do_rc_send(unsigned long data);
void ipath_do_uc_send(unsigned long data);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss,
u32 length, struct ib_send_wr *wr, struct ib_wc *wc);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
struct ipath_mregion *mr);
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask);
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int ipath_destroy_srq(struct ib_srq *ibsrq);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
struct ib_ucontext *context,
struct ib_udata *udata);
int ipath_destroy_cq(struct ib_cq *ibcq);
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
int list_len, u64 iova);
int ipath_unmap_fmr(struct list_head *fmr_list);
int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc);
extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
extern const u8 ipath_cvt_physportstate[];
extern const int ib_ipath_state_ops[];
extern unsigned int ib_ipath_lkey_table_size;
extern const u32 ib_ipath_rnr_table[];
#endif /* IPATH_VERBS_H */

View File

@ -0,0 +1,333 @@
/*
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/list.h>
#include <linux/rcupdate.h>
#include "ipath_verbs.h"
/*
* Global table of GID to attached QPs.
* The table is global to all ipath devices since a send from one QP/device
* needs to be locally routed to any locally attached QPs on the same
* or different device.
*/
static struct rb_root mcast_tree;
static DEFINE_SPINLOCK(mcast_lock);
/**
* ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
* @qp: the QP to link
*/
static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
{
struct ipath_mcast_qp *mqp;
mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
if (!mqp)
goto bail;
mqp->qp = qp;
atomic_inc(&qp->refcount);
bail:
return mqp;
}
static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
{
struct ipath_qp *qp = mqp->qp;
/* Notify ipath_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
kfree(mqp);
}
/**
* ipath_mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
*
* A list of QPs will be attached to this structure.
*/
static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
{
struct ipath_mcast *mcast;
mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
if (!mcast)
goto bail;
mcast->mgid = *mgid;
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
bail:
return mcast;
}
static void ipath_mcast_free(struct ipath_mcast *mcast)
{
struct ipath_mcast_qp *p, *tmp;
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
ipath_mcast_qp_free(p);
kfree(mcast);
}
/**
* ipath_mcast_find - search the global table for the given multicast GID
* @mgid: the multicast GID to search for
*
* Returns NULL if not found.
*
* The caller is responsible for decrementing the reference count if found.
*/
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
{
struct rb_node *n;
unsigned long flags;
struct ipath_mcast *mcast;
spin_lock_irqsave(&mcast_lock, flags);
n = mcast_tree.rb_node;
while (n) {
int ret;
mcast = rb_entry(n, struct ipath_mcast, rb_node);
ret = memcmp(mgid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else {
atomic_inc(&mcast->refcount);
spin_unlock_irqrestore(&mcast_lock, flags);
goto bail;
}
}
spin_unlock_irqrestore(&mcast_lock, flags);
mcast = NULL;
bail:
return mcast;
}
/**
* ipath_mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
*
* Return zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
* attached and neither structure was added.
*/
static int ipath_mcast_add(struct ipath_mcast *mcast,
struct ipath_mcast_qp *mqp)
{
struct rb_node **n = &mcast_tree.rb_node;
struct rb_node *pn = NULL;
unsigned long flags;
int ret;
spin_lock_irqsave(&mcast_lock, flags);
while (*n) {
struct ipath_mcast *tmcast;
struct ipath_mcast_qp *p;
pn = *n;
tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0) {
n = &pn->rb_left;
continue;
}
if (ret > 0) {
n = &pn->rb_right;
continue;
}
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
spin_unlock_irqrestore(&mcast_lock, flags);
ret = ESRCH;
goto bail;
}
}
list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
spin_unlock_irqrestore(&mcast_lock, flags);
ret = EEXIST;
goto bail;
}
list_add_tail_rcu(&mqp->list, &mcast->qp_list);
atomic_inc(&mcast->refcount);
rb_link_node(&mcast->rb_node, pn, n);
rb_insert_color(&mcast->rb_node, &mcast_tree);
spin_unlock_irqrestore(&mcast_lock, flags);
ret = 0;
bail:
return ret;
}
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_mcast *mcast;
struct ipath_mcast_qp *mqp;
int ret;
/*
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
mcast = ipath_mcast_alloc(gid);
if (mcast == NULL) {
ret = -ENOMEM;
goto bail;
}
mqp = ipath_mcast_qp_alloc(qp);
if (mqp == NULL) {
ipath_mcast_free(mcast);
ret = -ENOMEM;
goto bail;
}
switch (ipath_mcast_add(mcast, mqp)) {
case ESRCH:
/* Neither was used: can't attach the same QP twice. */
ipath_mcast_qp_free(mqp);
ipath_mcast_free(mcast);
ret = -EINVAL;
goto bail;
case EEXIST: /* The mcast wasn't used */
ipath_mcast_free(mcast);
break;
default:
break;
}
ret = 0;
bail:
return ret;
}
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
unsigned long flags;
int last = 0;
int ret;
spin_lock_irqsave(&mcast_lock, flags);
/* Find the GID in the mcast table. */
n = mcast_tree.rb_node;
while (1) {
if (n == NULL) {
spin_unlock_irqrestore(&mcast_lock, flags);
ret = 0;
goto bail;
}
mcast = rb_entry(n, struct ipath_mcast, rb_node);
ret = memcmp(gid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else
break;
}
/* Search the QP list. */
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
if (p->qp != qp)
continue;
/*
* We found it, so remove it, but don't poison the forward
* link until we are sure there are no list walkers.
*/
list_del_rcu(&p->list);
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
rb_erase(&mcast->rb_node, &mcast_tree);
last = 1;
}
break;
}
spin_unlock_irqrestore(&mcast_lock, flags);
if (p) {
/*
* Wait for any list walkers to finish before freeing the
* list element.
*/
wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
ipath_mcast_qp_free(p);
}
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
}
ret = 0;
bail:
return ret;
}
int ipath_mcast_tree_empty(void)
{
return mcast_tree.rb_node == NULL;
}

View File

@ -0,0 +1,157 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This file is conditionally built on x86_64 only. Otherwise weak symbol
* versions of the functions exported from here are used.
*/
#include <linux/pci.h>
#include <asm/mtrr.h>
#include <asm/processor.h>
#include "ipath_kernel.h"
/**
* ipath_enable_wc - enable write combining for MMIO writes to the device
* @dd: infinipath device
*
* This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
* write combining.
*/
int ipath_enable_wc(struct ipath_devdata *dd)
{
int ret = 0;
u64 pioaddr, piolen;
unsigned bits;
const unsigned long addr = pci_resource_start(dd->pcidev, 0);
const size_t len = pci_resource_len(dd->pcidev, 0);
/*
* Set the PIO buffers to be WCCOMB, so we get HT bursts to the
* chip. Linux (possibly the hardware) requires it to be on a power
* of 2 address matching the length (which has to be a power of 2).
* For rev1, that means the base address, for rev2, it will be just
* the PIO buffers themselves.
*/
pioaddr = addr + dd->ipath_piobufbase;
piolen = (dd->ipath_piobcnt2k +
dd->ipath_piobcnt4k) *
ALIGN(dd->ipath_piobcnt2k +
dd->ipath_piobcnt4k, dd->ipath_palign);
for (bits = 0; !(piolen & (1ULL << bits)); bits++)
/* do nothing */ ;
if (piolen != (1ULL << bits)) {
piolen >>= bits;
while (piolen >>= 1)
bits++;
piolen = 1ULL << (bits + 1);
}
if (pioaddr & (piolen - 1)) {
u64 atmp;
ipath_dbg("pioaddr %llx not on right boundary for size "
"%llx, fixing\n",
(unsigned long long) pioaddr,
(unsigned long long) piolen);
atmp = pioaddr & ~(piolen - 1);
if (atmp < addr || (atmp + piolen) > (addr + len)) {
ipath_dev_err(dd, "No way to align address/size "
"(%llx/%llx), no WC mtrr\n",
(unsigned long long) atmp,
(unsigned long long) piolen << 1);
ret = -ENODEV;
} else {
ipath_dbg("changing WC base from %llx to %llx, "
"len from %llx to %llx\n",
(unsigned long long) pioaddr,
(unsigned long long) atmp,
(unsigned long long) piolen,
(unsigned long long) piolen << 1);
pioaddr = atmp;
piolen <<= 1;
}
}
if (!ret) {
int cookie;
ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
"(addr %llx, len=0x%llx)\n",
(unsigned long long) pioaddr,
(unsigned long long) piolen);
cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
if (cookie < 0) {
{
dev_info(&dd->pcidev->dev,
"mtrr_add() WC for PIO bufs "
"failed (%d)\n",
cookie);
ret = -EINVAL;
}
} else {
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
}
}
return ret;
}
/**
* ipath_disable_wc - disable write combining for MMIO writes to the device
* @dd: infinipath device
*/
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
mtrr_del(dd->ipath_wc_cookie, 0, 0);
dd->ipath_wc_cookie = 0;
}
}
/**
* ipath_unordered_wc - indicate whether write combining is ordered
*
* Because our performance depends on our ability to do write combining mmio
* writes in the most efficient way, we need to know if we are on an Intel
* or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
* the order completed, and so no special flushing is required to get
* correct ordering. Intel processors, however, will flush write buffers
* out in "random" orders, and so explicit ordering is needed at times.
*/
int ipath_unordered_wc(void)
{
return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
}

View File

@ -0,0 +1,263 @@
#ifndef IPS_COMMON_H
#define IPS_COMMON_H
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ipath_common.h"
struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
* Port 3, TID 11, offset 14.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
__le16 pkt_flags;
};
struct ips_message_header {
__be16 lrh[4];
__be32 bth[3];
/* fields below this point are in host byte order */
struct ipath_header iph;
__u8 sub_opcode;
__u8 flags;
__u16 src_rank;
/* 24 bits. The upper 8 bit is available for other use */
union {
struct {
unsigned ack_seq_num:24;
unsigned port:4;
unsigned unused:4;
};
__u32 ack_seq_num_org;
};
__u8 expected_tid_session_id;
__u8 tinylen; /* to aid MPI */
union {
__u16 tag; /* to aid MPI */
__u16 mqhdr; /* for PSM MQ */
};
union {
__u32 mpi[4]; /* to aid MPI */
__u32 data[4];
__u64 mq[2]; /* for PSM MQ */
struct {
__u16 mtu;
__u8 major_ver;
__u8 minor_ver;
__u32 not_used; //free
__u32 run_id;
__u32 client_ver;
};
};
};
struct ether_header {
__be16 lrh[4];
__be32 bth[3];
struct ipath_header iph;
__u8 sub_opcode;
__u8 cmd;
__be16 lid;
__u16 mac[3];
__u8 frag_num;
__u8 seq_num;
__le32 len;
/* MUST be of word size due to PIO write requirements */
__u32 csum;
__le16 csum_offset;
__le16 flags;
__u16 first_2_bytes;
__u8 unused[2]; /* currently unused */
};
/*
* The PIO buffer used for sending infinipath messages must only be written
* in 32-bit words, all the data must be written, and no writes can occur
* after the last word is written (which transfers "ownership" of the buffer
* to the chip and triggers the message to be sent).
* Since the Linux sk_buff structure can be recursive, non-aligned, and
* any number of bytes in each segment, we use the following structure
* to keep information about the overall state of the copy operation.
* This is used to save the information needed to store the checksum
* in the right place before sending the last word to the hardware and
* to buffer the last 0-3 bytes of non-word sized segments.
*/
struct copy_data_s {
struct ether_header *hdr;
/* addr of PIO buf to write csum to */
__u32 __iomem *csum_pio;
__u32 __iomem *to; /* addr of PIO buf to write data to */
__u32 device; /* which device to allocate PIO bufs from */
__s32 error; /* set if there is an error. */
__s32 extra; /* amount of data saved in u.buf below */
__u32 len; /* total length to send in bytes */
__u32 flen; /* frament length in words */
__u32 csum; /* partial IP checksum */
__u32 pos; /* position for partial checksum */
__u32 offset; /* offset to where data currently starts */
__s32 checksum_calc; /* set to 1 when csum has been calculated */
struct sk_buff *skb;
union {
__u32 w;
__u8 buf[4];
} u;
};
/* IB - LRH header consts */
#define IPS_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define IPS_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
#define IPS_OFFSET 0
/*
* defines the cut-off point between the header queue and eager/expected
* TID queue
*/
#define NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE \
((sizeof(struct ips_message_header) - \
offsetof(struct ips_message_header, iph)) >> 2)
/* OpCodes */
#define OPCODE_IPS 0xC0
#define OPCODE_ITH4X 0xC1
/* OpCode 30 is use by stand-alone test programs */
#define OPCODE_RAW_DATA 0xDE
/* last OpCode (31) is reserved for test */
#define OPCODE_TEST 0xDF
/* sub OpCodes - ips */
#define OPCODE_SEQ_DATA 0x01
#define OPCODE_SEQ_CTRL 0x02
#define OPCODE_SEQ_MQ_DATA 0x03
#define OPCODE_SEQ_MQ_CTRL 0x04
#define OPCODE_ACK 0x10
#define OPCODE_NAK 0x11
#define OPCODE_ERR_CHK 0x20
#define OPCODE_ERR_CHK_PLS 0x21
#define OPCODE_STARTUP 0x30
#define OPCODE_STARTUP_ACK 0x31
#define OPCODE_STARTUP_NAK 0x32
#define OPCODE_STARTUP_EXT 0x34
#define OPCODE_STARTUP_ACK_EXT 0x35
#define OPCODE_STARTUP_NAK_EXT 0x36
#define OPCODE_TIDS_RELEASE 0x40
#define OPCODE_TIDS_RELEASE_CONFIRM 0x41
#define OPCODE_CLOSE 0x50
#define OPCODE_CLOSE_ACK 0x51
/*
* like OPCODE_CLOSE, but no complaint if other side has already closed.
* Used when doing abort(), MPI_Abort(), etc.
*/
#define OPCODE_ABORT 0x52
/* sub OpCodes - ith4x */
#define OPCODE_ENCAP 0x81
#define OPCODE_LID_ARP 0x82
/* Receive Header Queue: receive type (from infinipath) */
#define RCVHQ_RCV_TYPE_EXPECTED 0
#define RCVHQ_RCV_TYPE_EAGER 1
#define RCVHQ_RCV_TYPE_NON_KD 2
#define RCVHQ_RCV_TYPE_ERROR 3
/* misc. */
#define SIZE_OF_CRC 1
#define EAGER_TID_ID INFINIPATH_I_TID_MASK
#define IPS_DEFAULT_P_KEY 0xFFFF
#define IPS_PERMISSIVE_LID 0xFFFF
#define IPS_MULTICAST_LID_BASE 0xC000
#define IPS_AETH_CREDIT_SHIFT 24
#define IPS_AETH_CREDIT_MASK 0x1F
#define IPS_AETH_CREDIT_INVAL 0x1F
#define IPS_PSN_MASK 0xFFFFFF
#define IPS_MSN_MASK 0xFFFFFF
#define IPS_QPN_MASK 0xFFFFFF
#define IPS_MULTICAST_QPN 0xFFFFFF
/* functions for extracting fields from rcvhdrq entries */
static inline __u32 ips_get_hdr_err_flags(const __le32 * rbuf)
{
return __le32_to_cpu(rbuf[1]);
}
static inline __u32 ips_get_index(const __le32 * rbuf)
{
return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
& INFINIPATH_RHF_EGRINDEX_MASK;
}
static inline __u32 ips_get_rcv_type(const __le32 * rbuf)
{
return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
& INFINIPATH_RHF_RCVTYPE_MASK;
}
static inline __u32 ips_get_length_in_bytes(const __le32 * rbuf)
{
return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
& INFINIPATH_RHF_LENGTH_MASK) << 2;
}
static inline void *ips_get_first_protocol_header(const __u32 * rbuf)
{
return (void *)&rbuf[2];
}
static inline struct ips_message_header *ips_get_ips_header(const __u32 *
rbuf)
{
return (struct ips_message_header *)&rbuf[2];
}
static inline __u32 ips_get_ipath_ver(__le32 hdrword)
{
return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
& INFINIPATH_I_VERS_MASK;
}
#endif /* IPS_COMMON_H */

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _VERBS_DEBUG_H
#define _VERBS_DEBUG_H
/*
* This file contains tracing code for the ib_ipath kernel module.
*/
#ifndef _VERBS_DEBUGGING /* tracing enabled or not */
#define _VERBS_DEBUGGING 1
#endif
extern unsigned ib_ipath_debug;
#define _VERBS_ERROR(fmt,...) \
do { \
printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
} while(0)
#define _VERBS_UNIT_ERROR(unit,fmt,...) \
do { \
printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
} while(0)
#if _VERBS_DEBUGGING
/*
* Mask values for debugging. The scheme allows us to compile out any
* of the debug tracing stuff, and if compiled in, to enable or
* disable dynamically.
* This can be set at modprobe time also:
* modprobe ib_path ib_ipath_debug=3
*/
#define __VERBS_INFO 0x1 /* generic low verbosity stuff */
#define __VERBS_DBG 0x2 /* generic debug */
#define __VERBS_VDBG 0x4 /* verbose debug */
#define __VERBS_SMADBG 0x8000 /* sma packet debug */
#define _VERBS_INFO(fmt,...) \
do { \
if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
printk(KERN_INFO "%s: " fmt,"ib_ipath", \
##__VA_ARGS__); \
} while(0)
#define _VERBS_DBG(fmt,...) \
do { \
if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
printk(KERN_DEBUG "%s: " fmt, __func__, \
##__VA_ARGS__); \
} while(0)
#define _VERBS_VDBG(fmt,...) \
do { \
if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
printk(KERN_DEBUG "%s: " fmt, __func__, \
##__VA_ARGS__); \
} while(0)
#define _VERBS_SMADBG(fmt,...) \
do { \
if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
printk(KERN_DEBUG "%s: " fmt, __func__, \
##__VA_ARGS__); \
} while(0)
#else /* ! _VERBS_DEBUGGING */
#define _VERBS_INFO(fmt,...)
#define _VERBS_DBG(fmt,...)
#define _VERBS_VDBG(fmt,...)
#define _VERBS_SMADBG(fmt,...)
#endif /* _VERBS_DEBUGGING */
#endif /* _VERBS_DEBUG_H */