From e955d82543fea76b02aa243b182e782f71bda82c Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Mon, 25 Apr 2005 11:26:03 -0700
Subject: [PATCH 01/34] [IA64-SGI] sn2-pci-dma-abstraction.patch

Provide an abstraction of the altix pci dma runtime layer so that multiple
pci-based bridges can be supported.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/include/pci/pcibr_provider.h     |   6 +-
 .../sn/include/pci/pcibus_provider_defs.h     |  12 +-
 arch/ia64/sn/include/pci/pcidev.h             |   4 +
 arch/ia64/sn/kernel/io_init.c                 |  72 ++++++++++--
 arch/ia64/sn/pci/pci_dma.c                    |  35 +++---
 arch/ia64/sn/pci/pcibr/pcibr_dma.c            | 103 +++++++++---------
 arch/ia64/sn/pci/pcibr/pcibr_provider.c       |  20 ++++
 7 files changed, 173 insertions(+), 79 deletions(-)

diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h
index b1f05ffec70b..1cd291d8badd 100644
--- a/arch/ia64/sn/include/pci/pcibr_provider.h
+++ b/arch/ia64/sn/include/pci/pcibr_provider.h
@@ -123,9 +123,11 @@ pcibr_lock(struct pcibus_info *pcibus_info)
 }
 #define pcibr_unlock(pcibus_info, flag)  spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
 
+extern int  pcibr_init_provider(void);
 extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
-extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int);
-extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int);
+extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t);
+extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t);
+extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
 
 /*
  * prototypes for the bridge asic register access routines in pcibr_reg.c
diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
index 07065615bbea..f546b4ece33c 100644
--- a/arch/ia64/sn/include/pci/pcibus_provider_defs.h
+++ b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
@@ -18,6 +18,8 @@
 #define PCIIO_ASIC_TYPE_PIC	2
 #define PCIIO_ASIC_TYPE_TIOCP	3
 
+#define PCIIO_ASIC_MAX_TYPES	4
+
 /*
  * Common pciio bus provider data.  There should be one of these as the
  * first field in any pciio based provider soft structure (e.g. pcibr_soft
@@ -35,9 +37,15 @@ struct pcibus_bussoft {
 };
 
 /*
- * DMA mapping flags
+ * SN pci bus indirection
  */
 
-#define SN_PCIDMA_CONSISTENT    0x0001
+struct sn_pcibus_provider {
+	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t);
+	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t);
+	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
+	void *		(*bus_fixup)(struct pcibus_bussoft *);
+};
 
+extern struct sn_pcibus_provider *sn_pci_provider[];
 #endif				/* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff --git a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h
index 81eb95d3bf47..ed4031d80811 100644
--- a/arch/ia64/sn/include/pci/pcidev.h
+++ b/arch/ia64/sn/include/pci/pcidev.h
@@ -32,6 +32,9 @@ extern struct sn_irq_info **sn_irq;
 #define SN_PCIDEV_BUSSOFT(pci_dev) \
 	(SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
 
+#define SN_PCIDEV_BUSPROVIDER(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_provider)
+
 #define PCIIO_BUS_NONE	255      /* bus 255 reserved */
 #define PCIIO_SLOT_NONE 255
 #define PCIIO_FUNC_NONE 255
@@ -46,6 +49,7 @@ struct pcidev_info {
 	struct pci_dev		*pdi_linux_pcidev;	/* Kernel pci_dev */
 
 	struct sn_irq_info	*pdi_sn_irq_info;
+	struct sn_pcibus_provider *pdi_provider;	/* sn pci ops */
 };
 
 extern void sn_irq_fixup(struct pci_dev *pci_dev,
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 001880812b7c..3e5e4a901302 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -34,6 +34,37 @@ struct brick {
 
 int sn_ioif_inited = 0;		/* SN I/O infrastructure initialized? */
 
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size)
+{
+	return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+	return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft)
+{
+	return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+	.dma_map = sn_default_pci_map,
+	.dma_map_consistent = sn_default_pci_map,
+	.dma_unmap = sn_default_pci_unmap,
+	.bus_fixup = sn_default_pci_bus_fixup,
+};
+
 /*
  * Retrieve the DMA Flush List given nasid.  This list is needed 
  * to implement the WAR - Flush DMA data on PIO Reads.
@@ -201,6 +232,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 	struct sn_irq_info *sn_irq_info;
 	struct pci_dev *host_pci_dev;
 	int status = 0;
+	struct pcibus_bussoft *bs;
 
 	dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL);
 	if (SN_PCIDEV_INFO(dev) <= 0)
@@ -241,6 +273,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 	}
 
 	/* set up host bus linkages */
+	bs = SN_PCIBUS_BUSSOFT(dev->bus);
 	host_pci_dev =
 	    pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32,
 			  SN_PCIDEV_INFO(dev)->
@@ -248,10 +281,16 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 	SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info =
 	    SN_PCIDEV_INFO(host_pci_dev);
 	SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev;
-	SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus);
+	SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs;
+
+	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+		SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+	} else {
+		SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+	}
 
 	/* Only set up IRQ stuff if this device has a host bus context */
-	if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) {
+	if (bs && sn_irq_info->irq_irq) {
 		SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info;
 		dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq;
 		sn_irq_fixup(dev, sn_irq_info);
@@ -271,6 +310,7 @@ static void sn_pci_controller_fixup(int segment, int busnum)
 	struct pcibus_bussoft *prom_bussoft_ptr;
 	struct hubdev_info *hubdev_info;
 	void *provider_soft;
+	struct sn_pcibus_provider *provider;
 
 	status =
 	    sal_get_pcibus_info((u64) segment, (u64) busnum,
@@ -291,16 +331,22 @@ static void sn_pci_controller_fixup(int segment, int busnum)
 	/*
 	 * Per-provider fixup.  Copies the contents from prom to local
 	 * area and links SN_PCIBUS_BUSSOFT().
-	 *
-	 * Note:  Provider is responsible for ensuring that prom_bussoft_ptr
-	 * represents an asic-type that it can handle.
 	 */
 
-	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) {
-		return;		/* no further fixup necessary */
+	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) {
+		return;		/* unsupported asic type */
+	}
+
+	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+	if (provider == NULL) {
+		return;		/* no provider registerd for this asic */
+	}
+
+	provider_soft = NULL;
+	if (provider->bus_fixup) {
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr);
 	}
 
-	provider_soft = pcibr_bus_fixup(prom_bussoft_ptr);
 	if (provider_soft == NULL) {
 		return;		/* fixup failed or not applicable */
 	}
@@ -338,6 +384,16 @@ static int __init sn_pci_init(void)
 	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_SIMULATOR())
 		return 0;
 
+	/*
+	 * prime sn_pci_provider[].  Individial provider init routines will
+	 * override their respective default entries.
+	 */
+
+	for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+		sn_pci_provider[i] = &sn_pci_default_provider;
+
+	pcibr_init_provider();
+
 	/*
 	 * This is needed to avoid bounce limit checks in the blk layer
 	 */
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index f680824f819d..c2b92b94c563 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -14,7 +14,6 @@
 #include <asm/sn/sn_sal.h>
 #include "pci/pcibus_provider_defs.h"
 #include "pci/pcidev.h"
-#include "pci/pcibr_provider.h"
 
 #define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
 #define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
@@ -79,7 +78,8 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
@@ -102,8 +102,7 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 	 * resources.
 	 */
 
-	*dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size,
-				    SN_PCIDMA_CONSISTENT);
+	*dma_handle = provider->dma_map_consistent(pdev, phys_addr, size);
 	if (!*dma_handle) {
 		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
 		free_pages((unsigned long)cpuaddr, get_order(size));
@@ -127,11 +126,12 @@ EXPORT_SYMBOL(sn_dma_alloc_coherent);
 void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 			  dma_addr_t dma_handle)
 {
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	pcibr_dma_unmap(pcidev_info, dma_handle, 0);
+	provider->dma_unmap(pdev, dma_handle, 0);
 	free_pages((unsigned long)cpu_addr, get_order(size));
 }
 EXPORT_SYMBOL(sn_dma_free_coherent);
@@ -159,12 +159,13 @@ dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 {
 	dma_addr_t dma_addr;
 	unsigned long phys_addr;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
 	phys_addr = __pa(cpu_addr);
-	dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0);
+	dma_addr = provider->dma_map(pdev, phys_addr, size);
 	if (!dma_addr) {
 		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
 		return 0;
@@ -187,10 +188,12 @@ EXPORT_SYMBOL(sn_dma_map_single);
 void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 			 int direction)
 {
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
-	pcibr_dma_unmap(pcidev_info, dma_addr, direction);
+
+	provider->dma_unmap(pdev, dma_addr, direction);
 }
 EXPORT_SYMBOL(sn_dma_unmap_single);
 
@@ -207,12 +210,13 @@ void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 		     int nhwentries, int direction)
 {
 	int i;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
 	for (i = 0; i < nhwentries; i++, sg++) {
-		pcibr_dma_unmap(pcidev_info, sg->dma_address, direction);
+		provider->dma_unmap(pdev, sg->dma_address, direction);
 		sg->dma_address = (dma_addr_t) NULL;
 		sg->dma_length = 0;
 	}
@@ -233,7 +237,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 {
 	unsigned long phys_addr;
 	struct scatterlist *saved_sg = sg;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
 
 	BUG_ON(dev->bus != &pci_bus_type);
@@ -243,8 +248,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 	 */
 	for (i = 0; i < nhwentries; i++, sg++) {
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr,
-						sg->length, 0);
+		sg->dma_address = provider->dma_map(pdev,
+						    phys_addr, sg->length);
 
 		if (!sg->dma_address) {
 			printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index b1d66ac065c8..3c305f46417f 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -40,7 +40,7 @@ extern int sn_ioif_inited;
  *      we do not have to allocate entries in the PMU.
  */
 
-static uint64_t
+static dma_addr_t
 pcibr_dmamap_ate32(struct pcidev_info *info,
 		   uint64_t paddr, size_t req_size, uint64_t flags)
 {
@@ -109,7 +109,7 @@ pcibr_dmamap_ate32(struct pcidev_info *info,
 	return pci_addr;
 }
 
-static uint64_t
+static dma_addr_t
 pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
 			uint64_t dma_attributes)
 {
@@ -141,7 +141,7 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
 
 }
 
-static uint64_t
+static dma_addr_t
 pcibr_dmatrans_direct32(struct pcidev_info * info,
 			uint64_t paddr, size_t req_size, uint64_t flags)
 {
@@ -180,11 +180,11 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
  * DMA mappings for Direct 64 and 32 do not have any DMA maps.
  */
 void
-pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle,
-		int direction)
+pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
 {
-	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
-	    pdi_pcibus_info;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+	struct pcibus_info *pcibus_info =
+	    (struct pcibus_info *)pcidev_info->pdi_pcibus_info;
 
 	if (IS_PCI32_MAPPED(dma_handle)) {
 		int ate_index;
@@ -316,64 +316,63 @@ void sn_dma_flush(uint64_t addr)
 }
 
 /*
- * Wrapper DMA interface.  Called from pci_dma.c routines.
+ * DMA interfaces.  Called from pci_dma.c routines.
  */
 
-uint64_t
-pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr,
-	      size_t size, unsigned int flags)
+dma_addr_t
+pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
 {
 	dma_addr_t dma_handle;
-	struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
 
-	if (flags & SN_PCIDMA_CONSISTENT) {
-		/* sn_pci_alloc_consistent interfaces */
-		if (pcidev->dev.coherent_dma_mask == ~0UL) {
-			dma_handle =
-			    pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-						    PCI64_ATTR_BAR);
-		} else {
-			dma_handle =
-			    (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
-							    phys_addr, size,
-							    PCI32_ATE_BAR);
-		}
+	/* SN cannot support DMA addresses smaller than 32 bits. */
+	if (hwdev->dma_mask < 0x7fffffff) {
+		return 0;
+	}
+
+	if (hwdev->dma_mask == ~0UL) {
+		/*
+		 * Handle the most common case: 64 bit cards.  This
+		 * call should always succeed.
+		 */
+
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+						     PCI64_ATTR_PREF);
 	} else {
-		/* map_sg/map_single interfaces */
-
-		/* SN cannot support DMA addresses smaller than 32 bits. */
-		if (pcidev->dma_mask < 0x7fffffff) {
-			return 0;
-		}
-
-		if (pcidev->dma_mask == ~0UL) {
+		/* Handle 32-63 bit cards via direct mapping */
+		dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr,
+						     size, 0);
+		if (!dma_handle) {
 			/*
-			 * Handle the most common case: 64 bit cards.  This
-			 * call should always succeed.
+			 * It is a 32 bit card and we cannot do direct mapping,
+			 * so we use an ATE.
 			 */
 
-			dma_handle =
-			    pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-						    PCI64_ATTR_PREF);
-		} else {
-			/* Handle 32-63 bit cards via direct mapping */
-			dma_handle =
-			    pcibr_dmatrans_direct32(pcidev_info, phys_addr,
-						    size, 0);
-			if (!dma_handle) {
-				/*
-				 * It is a 32 bit card and we cannot do direct mapping,
-				 * so we use an ATE.
-				 */
-
-				dma_handle =
-				    pcibr_dmamap_ate32(pcidev_info, phys_addr,
-						       size, PCI32_ATE_PREF);
-			}
+			dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr,
+							size, PCI32_ATE_PREF);
 		}
 	}
 
 	return dma_handle;
 }
 
+dma_addr_t
+pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr,
+			 size_t size)
+{
+	dma_addr_t dma_handle;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+
+	if (hwdev->dev.coherent_dma_mask == ~0UL) {
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+					    PCI64_ATTR_BAR);
+	} else {
+		dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
+						    phys_addr, size,
+						    PCI32_ATE_BAR);
+	}
+
+	return dma_handle;
+}
+
 EXPORT_SYMBOL(sn_dma_flush);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 92bd278cf7ff..539ab1fdab21 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -168,3 +168,23 @@ void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info)
 		pcibr_force_interrupt(sn_irq_info);
 	}
 }
+
+/*
+ * Provider entries for PIC/CP
+ */
+
+struct sn_pcibus_provider pcibr_provider = {
+	.dma_map = pcibr_dma_map,
+	.dma_map_consistent = pcibr_dma_map_consistent,
+	.dma_unmap = pcibr_dma_unmap,
+	.bus_fixup = pcibr_bus_fixup,
+};
+
+int
+pcibr_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider;
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider;
+
+	return 0;
+}

From 9b08ebd167a3f3812131a54512f92c3c6738ad03 Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Mon, 25 Apr 2005 11:32:16 -0700
Subject: [PATCH 02/34] [IA64-SGI] sn2-move-pci-headers.patch

Move a couple of headers out of arch/ia64/sn/include/pci and into
include/asm-ia64/sn.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/io_init.c                                 | 4 ++--
 arch/ia64/sn/kernel/irq.c                                     | 4 ++--
 arch/ia64/sn/pci/pci_dma.c                                    | 4 ++--
 arch/ia64/sn/pci/pcibr/pcibr_ate.c                            | 4 ++--
 arch/ia64/sn/pci/pcibr/pcibr_dma.c                            | 4 ++--
 arch/ia64/sn/pci/pcibr/pcibr_provider.c                       | 4 ++--
 arch/ia64/sn/pci/pcibr/pcibr_reg.c                            | 4 ++--
 .../pci => include/asm-ia64/sn}/pcibus_provider_defs.h        | 0
 {arch/ia64/sn/include/pci => include/asm-ia64/sn}/pcidev.h    | 0
 9 files changed, 14 insertions(+), 14 deletions(-)
 rename {arch/ia64/sn/include/pci => include/asm-ia64/sn}/pcibus_provider_defs.h (100%)
 rename {arch/ia64/sn/include/pci => include/asm-ia64/sn}/pcidev.h (100%)

diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 3e5e4a901302..9f9d0464f72c 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -11,8 +11,8 @@
 #include <asm/sn/types.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include "xtalk/xwidgetdev.h"
 #include <asm/sn/geo.h>
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 3be44724f6c8..b52d32975f2b 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -13,8 +13,8 @@
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include "xtalk/xwidgetdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include <asm/sn/shub_mmr.h>
 #include <asm/sn/sn_sal.h>
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index c2b92b94c563..5da9bdbde7cb 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -12,8 +12,8 @@
 #include <linux/module.h>
 #include <asm/dma.h>
 #include <asm/sn/sn_sal.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 
 #define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
 #define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 9d6854666f9b..0e47bce85f2d 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -8,8 +8,8 @@
 
 #include <linux/types.h>
 #include <asm/sn/sn_sal.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 
 int pcibr_invalidate_ate = 0;	/* by default don't invalidate ATE on free */
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 3c305f46417f..c90685985d81 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -12,8 +12,8 @@
 #include <asm/sn/geo.h>
 #include "xtalk/xwidgetdev.h"
 #include "xtalk/hubdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/tiocp.h"
 #include "pci/pic.h"
 #include "pci/pcibr_provider.h"
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 539ab1fdab21..3893999d23d8 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -13,8 +13,8 @@
 #include "xtalk/xwidgetdev.h"
 #include <asm/sn/geo.h>
 #include "xtalk/hubdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include <asm/sn/addrs.h>
 
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
index 74a74a7d2a13..865c11c3b50a 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -8,8 +8,8 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/tiocp.h"
 #include "pci/pic.h"
 #include "pci/pcibr_provider.h"
diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h
similarity index 100%
rename from arch/ia64/sn/include/pci/pcibus_provider_defs.h
rename to include/asm-ia64/sn/pcibus_provider_defs.h
diff --git a/arch/ia64/sn/include/pci/pcidev.h b/include/asm-ia64/sn/pcidev.h
similarity index 100%
rename from arch/ia64/sn/include/pci/pcidev.h
rename to include/asm-ia64/sn/pcidev.h

From 9c90bdde77f7b7a42f7ebb900275d459ce2bac05 Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Mon, 25 Apr 2005 11:35:54 -0700
Subject: [PATCH 03/34] [IA64-SGI] altix: tioca chip driver (agp)

Provide a driver for the altix TIOCA AGP chipset.  An agpgart backend will
be provided as a separate patch.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/io_init.c              |   2 +
 arch/ia64/sn/pci/Makefile                  |   2 +-
 arch/ia64/sn/pci/tioca_provider.c          | 668 +++++++++++++++++++++
 include/asm-ia64/sn/pcibus_provider_defs.h |   3 +-
 include/asm-ia64/sn/tioca.h                | 596 ++++++++++++++++++
 include/asm-ia64/sn/tioca_provider.h       | 206 +++++++
 6 files changed, 1475 insertions(+), 2 deletions(-)
 create mode 100644 arch/ia64/sn/pci/tioca_provider.c
 create mode 100644 include/asm-ia64/sn/tioca.h
 create mode 100644 include/asm-ia64/sn/tioca_provider.h

diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 9f9d0464f72c..18160a06a8c9 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -19,6 +19,7 @@
 #include "xtalk/hubdev.h"
 #include <asm/sn/io.h>
 #include <asm/sn/simulator.h>
+#include <asm/sn/tioca_provider.h>
 
 char master_baseio_wid;
 nasid_t master_nasid = INVALID_NASID;	/* Partition Master */
@@ -393,6 +394,7 @@ static int __init sn_pci_init(void)
 		sn_pci_provider[i] = &sn_pci_default_provider;
 
 	pcibr_init_provider();
+	tioca_init_provider();
 
 	/*
 	 * This is needed to avoid bounce limit checks in the blk layer
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
index b5dca0097a8e..2f915bce25f9 100644
--- a/arch/ia64/sn/pci/Makefile
+++ b/arch/ia64/sn/pci/Makefile
@@ -7,4 +7,4 @@
 #
 # Makefile for the sn pci general routines.
 
-obj-y := pci_dma.o pcibr/ 
+obj-y := pci_dma.o tioca_provider.o pcibr/ 
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
new file mode 100644
index 000000000000..2234d61cdd4b
--- /dev/null
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -0,0 +1,668 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioca_provider.h>
+
+uint32_t tioca_gart_found;
+EXPORT_SYMBOL(tioca_gart_found);	/* used by agp-sgi */
+
+LIST_HEAD(tioca_list);
+EXPORT_SYMBOL(tioca_list);	/* used by agp-sgi */
+
+static int tioca_gart_init(struct tioca_kernel *);
+
+/**
+ * tioca_gart_init - Initialize SGI TIOCA GART
+ * @tioca_common: ptr to common prom/kernel struct identifying the 
+ *
+ * If the indicated tioca has devices present, initialize its associated
+ * GART MMR's and kernel memory.
+ */
+static int
+tioca_gart_init(struct tioca_kernel *tioca_kern)
+{
+	uint64_t ap_reg;
+	uint64_t offset;
+	struct page *tmp;
+	struct tioca_common *tioca_common;
+	volatile struct tioca *ca_base;
+
+	tioca_common = tioca_kern->ca_common;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	if (list_empty(tioca_kern->ca_devices))
+		return 0;
+
+	ap_reg = 0;
+
+	/*
+	 * Validate aperature size
+	 */
+
+	switch (CA_APERATURE_SIZE >> 20) {
+	case 4:
+		ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT);	/* 4MB */
+		break;
+	case 8:
+		ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT);	/* 8MB */
+		break;
+	case 16:
+		ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT);	/* 16MB */
+		break;
+	case 32:
+		ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT);	/* 32 MB */
+		break;
+	case 64:
+		ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT);	/* 64 MB */
+		break;
+	case 128:
+		ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT);	/* 128 MB */
+		break;
+	case 256:
+		ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT);	/* 256 MB */
+		break;
+	case 512:
+		ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT);	/* 512 MB */
+		break;
+	case 1024:
+		ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT);	/* 1GB */
+		break;
+	case 2048:
+		ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT);	/* 2GB */
+		break;
+	case 4096:
+		ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT);	/* 4 GB */
+		break;
+	default:
+		printk(KERN_ERR "%s:  Invalid CA_APERATURE_SIZE "
+		       "0x%lx\n", __FUNCTION__, (ulong) CA_APERATURE_SIZE);
+		return -1;
+	}
+
+	/*
+	 * Set up other aperature parameters
+	 */
+
+	if (PAGE_SIZE >= 16384) {
+		tioca_kern->ca_ap_pagesize = 16384;
+		ap_reg |= CA_GART_PAGE_SIZE;
+	} else {
+		tioca_kern->ca_ap_pagesize = 4096;
+	}
+
+	tioca_kern->ca_ap_size = CA_APERATURE_SIZE;
+	tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE;
+	tioca_kern->ca_gart_entries =
+	    tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize;
+
+	ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI);
+	ap_reg |= tioca_kern->ca_ap_bus_base;
+
+	/*
+	 * Allocate and set up the GART
+	 */
+
+	tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64);
+	tmp =
+	    alloc_pages_node(tioca_kern->ca_closest_node,
+			     GFP_KERNEL | __GFP_ZERO,
+			     get_order(tioca_kern->ca_gart_size));
+
+	if (!tmp) {
+		printk(KERN_ERR "%s:  Could not allocate "
+		       "%lu bytes (order %d) for GART\n",
+		       __FUNCTION__,
+		       tioca_kern->ca_gart_size,
+		       get_order(tioca_kern->ca_gart_size));
+		return -ENOMEM;
+	}
+
+	tioca_kern->ca_gart = page_address(tmp);
+	tioca_kern->ca_gart_coretalk_addr =
+	    PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart));
+
+	/*
+	 * Compute PCI/AGP convenience fields 
+	 */
+
+	offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE;
+	tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE;
+	tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_pcigart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start];
+	tioca_kern->ca_pcigart_entries =
+	    tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_pagemap =
+	    kcalloc(1, tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
+	if (!tioca_kern->ca_pcigart_pagemap) {
+		free_pages((unsigned long)tioca_kern->ca_gart,
+			   get_order(tioca_kern->ca_gart_size));
+		return -1;
+	}
+
+	offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE;
+	tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE;
+	tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_gfxgart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_gfxgart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start];
+	tioca_kern->ca_gfxgart_entries =
+	    tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize;
+
+	/*
+	 * various control settings:
+	 *      use agp op-combining
+	 *      use GET semantics to fetch memory
+	 *      participate in coherency domain
+	 *      prefetch TLB entries
+	 */
+
+	ca_base->ca_control1 |= CA_AGPDMA_OP_ENB_COMBDELAY;	/* PV895469 ? */
+	ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
+	ca_base->ca_control2 |= (0x2ull << CA_GART_MEM_PARAM_SHFT);
+	tioca_kern->ca_gart_iscoherent = 1;
+	ca_base->ca_control2 |=
+	    (CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB);
+
+	/*
+	 * Unmask GART fetch error interrupts.  Clear residual errors first.
+	 */
+
+	ca_base->ca_int_status_alias = CA_GART_FETCH_ERR;
+	ca_base->ca_mult_error_alias = CA_GART_FETCH_ERR;
+	ca_base->ca_int_mask &= ~CA_GART_FETCH_ERR;
+
+	/*
+	 * Program the aperature and gart registers in TIOCA
+	 */
+
+	ca_base->ca_gart_aperature = ap_reg;
+	ca_base->ca_gart_ptr_table = tioca_kern->ca_gart_coretalk_addr | 1;
+
+	return 0;
+}
+
+/**
+ * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions
+ * @tioca_kernel: structure representing the CA
+ *
+ * Given a CA, scan all attached functions making sure they all support
+ * FastWrite.  If so, enable FastWrite for all functions and the CA itself.
+ */
+
+void
+tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
+{
+	int cap_ptr;
+	uint64_t ca_control1;
+	uint32_t reg;
+	struct tioca *tioca_base;
+	struct pci_dev *pdev;
+	struct tioca_common *common;
+
+	common = tioca_kern->ca_common;
+
+	/*
+	 * Scan all vga controllers on this bus making sure they all
+	 * suport FW.  If not, return.
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		if (!cap_ptr)
+			return;	/* no AGP CAP means no FW */
+
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, &reg);
+		if (!(reg & PCI_AGP_STATUS_FW))
+			return;	/* function doesn't support FW */
+	}
+
+	/*
+	 * Set fw for all vga fn's
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, &reg);
+		reg |= PCI_AGP_COMMAND_FW;
+		pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg);
+	}
+
+	/*
+	 * Set ca's fw to match
+	 */
+
+	tioca_base = (struct tioca *)common->ca_common.bs_base;
+	ca_control1 = tioca_base->ca_control1;
+	ca_control1 |= CA_AGP_FW_ENABLE;
+	tioca_base->ca_control1 = ca_control1;
+}
+
+EXPORT_SYMBOL(tioca_fastwrite_enable);	/* used by agp-sgi */
+
+/**
+ * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit CA bus space.  No device context is necessary.
+ * Bits 53:0 come from the coretalk address.  We just need to mask in the
+ * following optional bits of the 64-bit pci address:
+ *
+ * 63:60 - Coretalk Packet Type -  0x1 for Mem Get/Put (coherent)
+ *                                 0x2 for PIO (non-coherent)
+ *                                 We will always use 0x1
+ * 55:55 - Swap bytes		   Currently unused
+ */
+static uint64_t
+tioca_dma_d64(unsigned long paddr)
+{
+	dma_addr_t bus_addr;
+
+	bus_addr = PHYS_TO_TIODMA(paddr);
+
+	BUG_ON(!bus_addr);
+	BUG_ON(bus_addr >> 54);
+
+	/* Set upper nibble to Cache Coherent Memory op */
+	bus_addr |= (1UL << 60);
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode
+ * @pdev: linux pci_dev representing the function
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info.
+ *
+ * The CA agp 48 bit direct address falls out as follows:
+ *
+ * When direct mapping AGP addresses, the 48 bit AGP address is
+ * constructed as follows:
+ *
+ * [47:40] - Low 8 bits of the page Node ID extracted from coretalk
+ *              address [47:40].  The upper 8 node bits are fixed
+ *              and come from the xxx register bits [5:0]
+ * [39:38] - Chiplet ID extracted from coretalk address [39:38]
+ * [37:00] - node offset extracted from coretalk address [37:00]
+ * 
+ * Since the node id in general will be non-zero, and the chiplet id
+ * will always be non-zero, it follows that the device must support
+ * a dma mask of at least 0xffffffffff (40 bits) to target node 0
+ * and in general should be 0xffffffffffff (48 bits) to target nodes
+ * up to 255.  Nodes above 255 need the support of the xxx register,
+ * and so a given CA can only directly target nodes in the range
+ * xxx - xxx+255.
+ */
+static uint64_t
+tioca_dma_d48(struct pci_dev *pdev, uint64_t paddr)
+{
+	struct tioca_common *tioca_common;
+	struct tioca *ca_base;
+	uint64_t ct_addr;
+	dma_addr_t bus_addr;
+	uint32_t node_upper;
+	uint64_t agp_dma_extn;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	ct_addr = PHYS_TO_TIODMA(paddr);
+	if (!ct_addr)
+		return 0;
+
+	bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffff);
+	node_upper = ct_addr >> 48;
+
+	if (node_upper > 64) {
+		printk(KERN_ERR "%s:  coretalk addr 0x%p node id out "
+		       "of range\n", __FUNCTION__, (void *)ct_addr);
+		return 0;
+	}
+
+	agp_dma_extn = ca_base->ca_agp_dma_addr_extn;
+	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
+		printk(KERN_ERR "%s:  coretalk upper node (%u) "
+		       "mismatch with ca_agp_dma_addr_extn (%lu)\n",
+		       __FUNCTION__,
+		       node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
+		return 0;
+	}
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_mapped - create a DMA mapping using a CA GART 
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @req_size: len (bytes) to map
+ *
+ * Map @paddr into CA address space using the GART mechanism.  The mapped
+ * dma_addr_t is guarenteed to be contiguous in CA bus space.
+ */
+static dma_addr_t
+tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size)
+{
+	int i, ps, ps_shift, entry, entries, mapsize, last_entry;
+	uint64_t xio_addr, end_xio_addr;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	dma_addr_t bus_addr = 0;
+	struct tioca_dmamap *ca_dmamap;
+	void *map;
+	unsigned long flags;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);;
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	xio_addr = PHYS_TO_TIODMA(paddr);
+	if (!xio_addr)
+		return 0;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	/*
+	 * allocate a map struct
+	 */
+
+	ca_dmamap = kcalloc(1, sizeof(struct tioca_dmamap), GFP_ATOMIC);
+	if (!ca_dmamap)
+		goto map_return;
+
+	/*
+	 * Locate free entries that can hold req_size.  Account for
+	 * unaligned start/length when allocating.
+	 */
+
+	ps = tioca_kern->ca_ap_pagesize;	/* will be power of 2 */
+	ps_shift = ffs(ps) - 1;
+	end_xio_addr = xio_addr + req_size - 1;
+
+	entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1;
+
+	map = tioca_kern->ca_pcigart_pagemap;
+	mapsize = tioca_kern->ca_pcigart_entries;
+
+	entry = find_first_zero_bit(map, mapsize);
+	while (entry < mapsize) {
+		last_entry = find_next_bit(map, mapsize, entry);
+
+		if (last_entry - entry >= entries)
+			break;
+
+		entry = find_next_zero_bit(map, mapsize, last_entry);
+	}
+
+	if (entry > mapsize)
+		goto map_return;
+
+	for (i = 0; i < entries; i++)
+		set_bit(entry + i, map);
+
+	bus_addr = tioca_kern->ca_pciap_base + (entry * ps);
+
+	ca_dmamap->cad_dma_addr = bus_addr;
+	ca_dmamap->cad_gart_size = entries;
+	ca_dmamap->cad_gart_entry = entry;
+	list_add(&ca_dmamap->cad_list, &tioca_kern->ca_list);
+
+	if (xio_addr % ps) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		bus_addr += xio_addr & (ps - 1);
+		xio_addr &= ~(ps - 1);
+		xio_addr += ps;
+		entry++;
+	}
+
+	while (xio_addr < end_xio_addr) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		xio_addr += ps;
+		entry++;
+	}
+
+	tioca_tlbflush(tioca_kern);
+
+map_return:
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_unmap - release CA mapping resources
+ * @pdev: linux pci_dev representing the function
+ * @bus_addr: bus address returned by an earlier tioca_dma_map
+ * @dir: mapping direction (unused)
+ *
+ * Locate mapping resources associated with @bus_addr and release them.
+ * For mappings created using the direct modes (64 or 48) there are no
+ * resources to release.
+ */
+void
+tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
+{
+	int i, entry;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct tioca_dmamap *map;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+	unsigned long flags;
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	/* return straight away if this isn't be a mapped address */
+
+	if (bus_addr < tioca_kern->ca_pciap_base ||
+	    bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size))
+		return;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list)
+	    if (map->cad_dma_addr == bus_addr)
+		break;
+
+	BUG_ON(map == NULL);
+
+	entry = map->cad_gart_entry;
+
+	for (i = 0; i < map->cad_gart_size; i++, entry++) {
+		clear_bit(entry, tioca_kern->ca_pcigart_pagemap);
+		tioca_kern->ca_pcigart[entry] = 0;
+	}
+	tioca_tlbflush(tioca_kern);
+
+	list_del(&map->cad_list);
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	kfree(map);
+}
+
+/**
+ * tioca_dma_map - map pages for PCI DMA
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @byte_count: bytes to map
+ *
+ * This is the main wrapper for mapping host physical pages to CA PCI space.
+ * The mapping mode used is based on the devices dma_mask.  As a last resort
+ * use the GART mapped mode.
+ */
+uint64_t
+tioca_dma_map(struct pci_dev *pdev, uint64_t paddr, size_t byte_count)
+{
+	uint64_t mapaddr;
+
+	/*
+	 * If card is 64 or 48 bit addresable, use a direct mapping.  32
+	 * bit direct is so restrictive w.r.t. where the memory resides that
+	 * we don't use it even though CA has some support.
+	 */
+
+	if (pdev->dma_mask == ~0UL)
+		mapaddr = tioca_dma_d64(paddr);
+	else if (pdev->dma_mask == 0xffffffffffffUL)
+		mapaddr = tioca_dma_d48(pdev, paddr);
+	else
+		mapaddr = 0;
+
+	/* Last resort ... use PCI portion of CA GART */
+
+	if (mapaddr == 0)
+		mapaddr = tioca_dma_mapped(pdev, paddr, byte_count);
+
+	return mapaddr;
+}
+
+/**
+ * tioca_error_intr_handler - SGI TIO CA error interrupt handler
+ * @irq: unused
+ * @arg: pointer to tioca_common struct for the given CA
+ * @pt: unused
+ *
+ * Handle a CA error interrupt.  Simply a wrapper around a SAL call which
+ * defers processing to the SGI prom.
+ */
+static irqreturn_t
+tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
+{
+	struct tioca_common *soft = arg;
+	struct ia64_sal_retval ret_stuff;
+	uint64_t segment;
+	uint64_t busnum;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = 0;
+	busnum = soft->ca_common.bs_persist_busnum;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+			segment, busnum, 0, 0, 0, 0, 0);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus
+ * @prom_bussoft: Common prom/kernel struct representing the bus
+ *
+ * Replicates the tioca_common pointed to by @prom_bussoft in kernel
+ * space.  Allocates and initializes a kernel-only area for a given CA,
+ * and sets up an irq for handling CA error interrupts.
+ *
+ * On successful setup, returns the kernel version of tioca_common back to
+ * the caller.
+ */
+void *
+tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+{
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct pci_bus *bus;
+
+	/* sanity check prom rev */
+
+	if (sn_sal_rev_major() < 4 ||
+	    (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) {
+		printk
+		    (KERN_ERR "%s:  SGI prom rev 4.06 or greater required "
+		     "for tioca support\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/*
+	 * Allocate kernel bus soft and copy from prom.
+	 */
+
+	tioca_common = kcalloc(1, sizeof(struct tioca_common), GFP_KERNEL);
+	if (!tioca_common)
+		return NULL;
+
+	memcpy(tioca_common, prom_bussoft, sizeof(struct tioca_common));
+	tioca_common->ca_common.bs_base |= __IA64_UNCACHED_OFFSET;
+
+	/* init kernel-private area */
+
+	tioca_kern = kcalloc(1, sizeof(struct tioca_kernel), GFP_KERNEL);
+	if (!tioca_kern) {
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_kern->ca_common = tioca_common;
+	spin_lock_init(&tioca_kern->ca_lock);
+	INIT_LIST_HEAD(&tioca_kern->ca_dmamaps);
+	tioca_kern->ca_closest_node =
+	    nasid_to_cnodeid(tioca_common->ca_closest_nasid);
+	tioca_common->ca_kernel_private = (uint64_t) tioca_kern;
+
+	bus = pci_find_bus(0, tioca_common->ca_common.bs_persist_busnum);
+	BUG_ON(!bus);
+	tioca_kern->ca_devices = &bus->devices;
+
+	/* init GART */
+
+	if (tioca_gart_init(tioca_kern) < 0) {
+		kfree(tioca_kern);
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_gart_found++;
+	list_add(&tioca_kern->ca_list, &tioca_list);
+
+	if (request_irq(SGI_TIOCA_ERROR,
+			tioca_error_intr_handler,
+			SA_SHIRQ, "TIOCA error", (void *)tioca_common))
+		printk(KERN_WARNING
+		       "%s:  Unable to get irq %d.  "
+		       "Error interrupts won't be routed for TIOCA bus %d\n",
+		       __FUNCTION__, SGI_TIOCA_ERROR,
+		       (int)tioca_common->ca_common.bs_persist_busnum);
+
+	return tioca_common;
+}
+
+static struct sn_pcibus_provider tioca_pci_interfaces = {
+	.dma_map = tioca_dma_map,
+	.dma_map_consistent = tioca_dma_map,
+	.dma_unmap = tioca_dma_unmap,
+	.bus_fixup = tioca_bus_fixup,
+};
+
+/**
+ * tioca_init_provider - init SN PCI provider ops for TIO CA
+ */
+int
+tioca_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces;
+	return 0;
+}
diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h
index f546b4ece33c..04e27d5b3820 100644
--- a/include/asm-ia64/sn/pcibus_provider_defs.h
+++ b/include/asm-ia64/sn/pcibus_provider_defs.h
@@ -17,8 +17,9 @@
 #define PCIIO_ASIC_TYPE_PPB	1
 #define PCIIO_ASIC_TYPE_PIC	2
 #define PCIIO_ASIC_TYPE_TIOCP	3
+#define PCIIO_ASIC_TYPE_TIOCA	4
 
-#define PCIIO_ASIC_MAX_TYPES	4
+#define PCIIO_ASIC_MAX_TYPES	5
 
 /*
  * Common pciio bus provider data.  There should be one of these as the
diff --git a/include/asm-ia64/sn/tioca.h b/include/asm-ia64/sn/tioca.h
new file mode 100644
index 000000000000..bc1aacfb9483
--- /dev/null
+++ b/include/asm-ia64/sn/tioca.h
@@ -0,0 +1,596 @@
+#ifndef _ASM_IA64_SN_TIO_TIOCA_H
+#define _ASM_IA64_SN_TIO_TIOCA_H
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#define TIOCA_PART_NUM	0xE020
+#define TIOCA_MFGR_NUM	0x24
+#define TIOCA_REV_A	0x1
+
+/*
+ * Register layout for TIO:CA.  See below for bitmasks for each register.
+ */
+
+struct tioca {
+	uint64_t	ca_id;				/* 0x000000 */
+	uint64_t	ca_control1;			/* 0x000008 */
+	uint64_t	ca_control2;			/* 0x000010 */
+	uint64_t	ca_status1;			/* 0x000018 */
+	uint64_t	ca_status2;			/* 0x000020 */
+	uint64_t	ca_gart_aperature;		/* 0x000028 */
+	uint64_t	ca_gfx_detach;			/* 0x000030 */
+	uint64_t	ca_inta_dest_addr;		/* 0x000038 */
+	uint64_t	ca_intb_dest_addr;		/* 0x000040 */
+	uint64_t	ca_err_int_dest_addr;		/* 0x000048 */
+	uint64_t	ca_int_status;			/* 0x000050 */
+	uint64_t	ca_int_status_alias;		/* 0x000058 */
+	uint64_t	ca_mult_error;			/* 0x000060 */
+	uint64_t	ca_mult_error_alias;		/* 0x000068 */
+	uint64_t	ca_first_error;			/* 0x000070 */
+	uint64_t	ca_int_mask;			/* 0x000078 */
+	uint64_t	ca_crm_pkterr_type;		/* 0x000080 */
+	uint64_t	ca_crm_pkterr_type_alias;	/* 0x000088 */
+	uint64_t	ca_crm_ct_error_detail_1;	/* 0x000090 */
+	uint64_t	ca_crm_ct_error_detail_2;	/* 0x000098 */
+	uint64_t	ca_crm_tnumto;			/* 0x0000A0 */
+	uint64_t	ca_gart_err;			/* 0x0000A8 */
+	uint64_t	ca_pcierr_type;			/* 0x0000B0 */
+	uint64_t	ca_pcierr_addr;			/* 0x0000B8 */
+
+	uint64_t	ca_pad_0000C0[3];		/* 0x0000{C0..D0} */
+
+	uint64_t	ca_pci_rd_buf_flush;		/* 0x0000D8 */
+	uint64_t	ca_pci_dma_addr_extn;		/* 0x0000E0 */
+	uint64_t	ca_agp_dma_addr_extn;		/* 0x0000E8 */
+	uint64_t	ca_force_inta;			/* 0x0000F0 */
+	uint64_t	ca_force_intb;			/* 0x0000F8 */
+	uint64_t	ca_debug_vector_sel;		/* 0x000100 */
+	uint64_t	ca_debug_mux_core_sel;		/* 0x000108 */
+	uint64_t	ca_debug_mux_pci_sel;		/* 0x000110 */
+	uint64_t	ca_debug_domain_sel;		/* 0x000118 */
+
+	uint64_t	ca_pad_000120[28];		/* 0x0001{20..F8} */
+
+	uint64_t	ca_gart_ptr_table;		/* 0x200 */
+	uint64_t	ca_gart_tlb_addr[8];		/* 0x2{08..40} */
+};
+
+/*
+ * Mask/shift definitions for TIO:CA registers.  The convention here is
+ * to mainly use the names as they appear in the "TIO AEGIS Programmers'
+ * Reference" with a CA_ prefix added.  Some exceptions were made to fix
+ * duplicate field names or to generalize fields that are common to
+ * different registers (ca_debug_mux_core_sel and ca_debug_mux_pci_sel for
+ * example).
+ *
+ * Fields consisting of a single bit have a single #define have a single
+ * macro declaration to mask the bit.  Fields consisting of multiple bits
+ * have two declarations: one to mask the proper bits in a register, and 
+ * a second with the suffix "_SHFT" to identify how far the mask needs to
+ * be shifted right to get its base value.
+ */
+
+/* ==== ca_control1 */
+#define CA_SYS_BIG_END			(1ull << 0)
+#define CA_DMA_AGP_SWAP			(1ull << 1)
+#define CA_DMA_PCI_SWAP			(1ull << 2)
+#define CA_PIO_IO_SWAP			(1ull << 3)
+#define CA_PIO_MEM_SWAP			(1ull << 4)
+#define CA_GFX_WR_SWAP			(1ull << 5)
+#define CA_AGP_FW_ENABLE		(1ull << 6)
+#define CA_AGP_CAL_CYCLE		(0x7ull << 7)
+#define CA_AGP_CAL_CYCLE_SHFT		7
+#define CA_AGP_CAL_PRSCL_BYP		(1ull << 10)
+#define CA_AGP_INIT_CAL_ENB		(1ull << 11)
+#define CA_INJ_ADDR_PERR		(1ull << 12)
+#define CA_INJ_DATA_PERR		(1ull << 13)
+	/* bits 15:14 unused */
+#define CA_PCIM_IO_NBE_AD		(0x7ull << 16)
+#define CA_PCIM_IO_NBE_AD_SHFT		16
+#define CA_PCIM_FAST_BTB_ENB		(1ull << 19)
+	/* bits 23:20 unused */
+#define CA_PIO_ADDR_OFFSET		(0xffull << 24)
+#define CA_PIO_ADDR_OFFSET_SHFT		24
+	/* bits 35:32 unused */
+#define CA_AGPDMA_OP_COMBDELAY		(0x1full << 36)
+#define CA_AGPDMA_OP_COMBDELAY_SHFT	36
+	/* bit 41 unused */
+#define CA_AGPDMA_OP_ENB_COMBDELAY	(1ull << 42)
+#define	CA_PCI_INT_LPCNT		(0xffull << 44)
+#define CA_PCI_INT_LPCNT_SHFT		44
+	/* bits 63:52 unused */
+
+/* ==== ca_control2 */
+#define CA_AGP_LATENCY_TO		(0xffull << 0)
+#define CA_AGP_LATENCY_TO_SHFT		0
+#define CA_PCI_LATENCY_TO		(0xffull << 8)
+#define CA_PCI_LATENCY_TO_SHFT		8
+#define CA_PCI_MAX_RETRY		(0x3ffull << 16)
+#define CA_PCI_MAX_RETRY_SHFT		16
+	/* bits 27:26 unused */
+#define CA_RT_INT_EN			(0x3ull << 28)
+#define CA_RT_INT_EN_SHFT			28
+#define CA_MSI_INT_ENB			(1ull << 30)
+#define CA_PCI_ARB_ERR_ENB		(1ull << 31)
+#define CA_GART_MEM_PARAM		(0x3ull << 32)
+#define CA_GART_MEM_PARAM_SHFT		32
+#define CA_GART_RD_PREFETCH_ENB		(1ull << 34)
+#define CA_GART_WR_PREFETCH_ENB		(1ull << 35)
+#define CA_GART_FLUSH_TLB		(1ull << 36)
+	/* bits 39:37 unused */
+#define CA_CRM_TNUMTO_PERIOD		(0x1fffull << 40)
+#define CA_CRM_TNUMTO_PERIOD_SHFT	40
+	/* bits 55:53 unused */
+#define CA_CRM_TNUMTO_ENB		(1ull << 56)
+#define CA_CRM_PRESCALER_BYP		(1ull << 57)
+	/* bits 59:58 unused */
+#define CA_CRM_MAX_CREDIT		(0x7ull << 60)
+#define CA_CRM_MAX_CREDIT_SHFT		60
+	/* bit 63 unused */
+
+/* ==== ca_status1 */
+#define CA_CORELET_ID			(0x3ull << 0)
+#define CA_CORELET_ID_SHFT		0
+#define CA_INTA_N			(1ull << 2)
+#define CA_INTB_N			(1ull << 3)
+#define CA_CRM_CREDIT_AVAIL		(0x7ull << 4)
+#define CA_CRM_CREDIT_AVAIL_SHFT	4
+	/* bit 7 unused */
+#define CA_CRM_SPACE_AVAIL		(0x7full << 8)
+#define CA_CRM_SPACE_AVAIL_SHFT		8
+	/* bit 15 unused */
+#define CA_GART_TLB_VAL			(0xffull << 16)
+#define CA_GART_TLB_VAL_SHFT		16
+	/* bits 63:24 unused */
+
+/* ==== ca_status2 */
+#define CA_GFX_CREDIT_AVAIL		(0xffull << 0)
+#define CA_GFX_CREDIT_AVAIL_SHFT	0
+#define CA_GFX_OPQ_AVAIL		(0xffull << 8)
+#define CA_GFX_OPQ_AVAIL_SHFT		8
+#define CA_GFX_WRBUFF_AVAIL		(0xffull << 16)
+#define CA_GFX_WRBUFF_AVAIL_SHFT	16
+#define CA_ADMA_OPQ_AVAIL		(0xffull << 24)
+#define CA_ADMA_OPQ_AVAIL_SHFT		24
+#define CA_ADMA_WRBUFF_AVAIL		(0xffull << 32)
+#define CA_ADMA_WRBUFF_AVAIL_SHFT	32
+#define CA_ADMA_RDBUFF_AVAIL		(0x7full << 40)
+#define CA_ADMA_RDBUFF_AVAIL_SHFT	40
+#define CA_PCI_PIO_OP_STAT		(1ull << 47)
+#define CA_PDMA_OPQ_AVAIL		(0xfull << 48)
+#define CA_PDMA_OPQ_AVAIL_SHFT		48
+#define CA_PDMA_WRBUFF_AVAIL		(0xfull << 52)
+#define CA_PDMA_WRBUFF_AVAIL_SHFT	52
+#define CA_PDMA_RDBUFF_AVAIL		(0x3ull << 56)
+#define CA_PDMA_RDBUFF_AVAIL_SHFT	56
+	/* bits 63:58 unused */
+
+/* ==== ca_gart_aperature */
+#define CA_GART_AP_ENB_AGP		(1ull << 0)
+#define CA_GART_PAGE_SIZE		(1ull << 1)
+#define CA_GART_AP_ENB_PCI		(1ull << 2)
+	/* bits 11:3 unused */
+#define CA_GART_AP_SIZE			(0x3ffull << 12)
+#define CA_GART_AP_SIZE_SHFT		12
+#define CA_GART_AP_BASE			(0x3ffffffffffull << 22)
+#define CA_GART_AP_BASE_SHFT		22
+
+/* ==== ca_inta_dest_addr
+   ==== ca_intb_dest_addr 
+   ==== ca_err_int_dest_addr */
+	/* bits 2:0 unused */
+#define CA_INT_DEST_ADDR		(0x7ffffffffffffull << 3)
+#define CA_INT_DEST_ADDR_SHFT		3
+	/* bits 55:54 unused */
+#define CA_INT_DEST_VECT		(0xffull << 56)
+#define CA_INT_DEST_VECT_SHFT		56
+
+/* ==== ca_int_status */
+/* ==== ca_int_status_alias */
+/* ==== ca_mult_error */
+/* ==== ca_mult_error_alias */
+/* ==== ca_first_error */
+/* ==== ca_int_mask */
+#define CA_PCI_ERR			(1ull << 0)
+	/* bits 3:1 unused */
+#define CA_GART_FETCH_ERR		(1ull << 4)
+#define CA_GFX_WR_OVFLW			(1ull << 5)
+#define CA_PIO_REQ_OVFLW		(1ull << 6)
+#define CA_CRM_PKTERR			(1ull << 7)
+#define CA_CRM_DVERR			(1ull << 8)
+#define CA_TNUMTO			(1ull << 9)
+#define CA_CXM_RSP_CRED_OVFLW		(1ull << 10)
+#define CA_CXM_REQ_CRED_OVFLW		(1ull << 11)
+#define CA_PIO_INVALID_ADDR		(1ull << 12)
+#define CA_PCI_ARB_TO			(1ull << 13)
+#define CA_AGP_REQ_OFLOW		(1ull << 14)
+#define CA_SBA_TYPE1_ERR		(1ull << 15)
+	/* bit 16 unused */
+#define CA_INTA				(1ull << 17)
+#define CA_INTB				(1ull << 18)
+#define CA_MULT_INTA			(1ull << 19)
+#define CA_MULT_INTB			(1ull << 20)
+#define CA_GFX_CREDIT_OVFLW		(1ull << 21)
+	/* bits 63:22 unused */
+
+/* ==== ca_crm_pkterr_type */
+/* ==== ca_crm_pkterr_type_alias */
+#define CA_CRM_PKTERR_SBERR_HDR		(1ull << 0)
+#define CA_CRM_PKTERR_DIDN		(1ull << 1)
+#define CA_CRM_PKTERR_PACTYPE		(1ull << 2)
+#define CA_CRM_PKTERR_INV_TNUM		(1ull << 3)
+#define CA_CRM_PKTERR_ADDR_RNG		(1ull << 4)
+#define CA_CRM_PKTERR_ADDR_ALGN		(1ull << 5)
+#define CA_CRM_PKTERR_HDR_PARAM		(1ull << 6)
+#define CA_CRM_PKTERR_CW_ERR		(1ull << 7)
+#define CA_CRM_PKTERR_SBERR_NH		(1ull << 8)
+#define CA_CRM_PKTERR_EARLY_TERM	(1ull << 9)
+#define CA_CRM_PKTERR_EARLY_TAIL	(1ull << 10)
+#define CA_CRM_PKTERR_MSSNG_TAIL	(1ull << 11)
+#define CA_CRM_PKTERR_MSSNG_HDR		(1ull << 12)
+	/* bits 15:13 unused */
+#define CA_FIRST_CRM_PKTERR_SBERR_HDR	(1ull << 16)
+#define CA_FIRST_CRM_PKTERR_DIDN	(1ull << 17)
+#define CA_FIRST_CRM_PKTERR_PACTYPE	(1ull << 18)
+#define CA_FIRST_CRM_PKTERR_INV_TNUM	(1ull << 19)
+#define CA_FIRST_CRM_PKTERR_ADDR_RNG	(1ull << 20)
+#define CA_FIRST_CRM_PKTERR_ADDR_ALGN	(1ull << 21)
+#define CA_FIRST_CRM_PKTERR_HDR_PARAM	(1ull << 22)
+#define CA_FIRST_CRM_PKTERR_CW_ERR	(1ull << 23)
+#define CA_FIRST_CRM_PKTERR_SBERR_NH	(1ull << 24)
+#define CA_FIRST_CRM_PKTERR_EARLY_TERM	(1ull << 25)
+#define CA_FIRST_CRM_PKTERR_EARLY_TAIL	(1ull << 26)
+#define CA_FIRST_CRM_PKTERR_MSSNG_TAIL	(1ull << 27)
+#define CA_FIRST_CRM_PKTERR_MSSNG_HDR	(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_crm_ct_error_detail_1 */
+#define CA_PKT_TYPE			(0xfull << 0)
+#define CA_PKT_TYPE_SHFT		0
+#define CA_SRC_ID			(0x3ull << 4)
+#define CA_SRC_ID_SHFT			4
+#define CA_DATA_SZ			(0x3ull << 6)
+#define CA_DATA_SZ_SHFT			6
+#define CA_TNUM				(0xffull << 8)
+#define CA_TNUM_SHFT			8
+#define CA_DW_DATA_EN			(0xffull << 16)
+#define CA_DW_DATA_EN_SHFT		16
+#define CA_GFX_CRED			(0xffull << 24)
+#define CA_GFX_CRED_SHFT		24
+#define CA_MEM_RD_PARAM			(0x3ull << 32)
+#define CA_MEM_RD_PARAM_SHFT		32
+#define CA_PIO_OP			(1ull << 34)
+#define CA_CW_ERR			(1ull << 35)
+	/* bits 62:36 unused */
+#define CA_VALID			(1ull << 63)
+
+/* ==== ca_crm_ct_error_detail_2 */
+	/* bits 2:0 unused */
+#define CA_PKT_ADDR			(0x1fffffffffffffull << 3)
+#define CA_PKT_ADDR_SHFT		3
+	/* bits 63:56 unused */
+
+/* ==== ca_crm_tnumto */
+#define CA_CRM_TNUMTO_VAL		(0xffull << 0)
+#define CA_CRM_TNUMTO_VAL_SHFT		0
+#define CA_CRM_TNUMTO_WR		(1ull << 8)
+	/* bits 63:9 unused */
+
+/* ==== ca_gart_err */
+#define CA_GART_ERR_SOURCE		(0x3ull << 0)
+#define CA_GART_ERR_SOURCE_SHFT		0
+	/* bits 3:2 unused */
+#define CA_GART_ERR_ADDR		(0xfffffffffull << 4)
+#define CA_GART_ERR_ADDR_SHFT		4
+	/* bits 63:40 unused */
+
+/* ==== ca_pcierr_type */
+#define CA_PCIERR_DATA			(0xffffffffull << 0)
+#define CA_PCIERR_DATA_SHFT		0
+#define CA_PCIERR_ENB			(0xfull << 32)
+#define CA_PCIERR_ENB_SHFT		32
+#define CA_PCIERR_CMD			(0xfull << 36)
+#define CA_PCIERR_CMD_SHFT		36
+#define CA_PCIERR_A64			(1ull << 40)
+#define CA_PCIERR_SLV_SERR		(1ull << 41)
+#define CA_PCIERR_SLV_WR_PERR		(1ull << 42)
+#define CA_PCIERR_SLV_RD_PERR		(1ull << 43)
+#define CA_PCIERR_MST_SERR		(1ull << 44)
+#define CA_PCIERR_MST_WR_PERR		(1ull << 45)
+#define CA_PCIERR_MST_RD_PERR		(1ull << 46)
+#define CA_PCIERR_MST_MABT		(1ull << 47)
+#define CA_PCIERR_MST_TABT		(1ull << 48)
+#define CA_PCIERR_MST_RETRY_TOUT	(1ull << 49)
+
+#define CA_PCIERR_TYPES \
+	(CA_PCIERR_A64|CA_PCIERR_SLV_SERR| \
+	 CA_PCIERR_SLV_WR_PERR|CA_PCIERR_SLV_RD_PERR| \
+	 CA_PCIERR_MST_SERR|CA_PCIERR_MST_WR_PERR|CA_PCIERR_MST_RD_PERR| \
+	 CA_PCIERR_MST_MABT|CA_PCIERR_MST_TABT|CA_PCIERR_MST_RETRY_TOUT)
+
+	/* bits 63:50 unused */
+
+/* ==== ca_pci_dma_addr_extn */
+#define CA_UPPER_NODE_OFFSET		(0x3full << 0)
+#define CA_UPPER_NODE_OFFSET_SHFT	0
+	/* bits 7:6 unused */
+#define CA_CHIPLET_ID			(0x3ull << 8)
+#define CA_CHIPLET_ID_SHFT		8
+	/* bits 11:10 unused */
+#define CA_PCI_DMA_NODE_ID		(0xffffull << 12)
+#define CA_PCI_DMA_NODE_ID_SHFT		12
+	/* bits 27:26 unused */
+#define CA_PCI_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+
+/* ==== ca_agp_dma_addr_extn */
+	/* bits 19:0 unused */
+#define CA_AGP_DMA_NODE_ID		(0xffffull << 20)
+#define CA_AGP_DMA_NODE_ID_SHFT		20
+	/* bits 27:26 unused */
+#define CA_AGP_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_debug_vector_sel */
+#define CA_DEBUG_MN_VSEL		(0xfull << 0)
+#define CA_DEBUG_MN_VSEL_SHFT		0
+#define CA_DEBUG_PP_VSEL		(0xfull << 4)
+#define CA_DEBUG_PP_VSEL_SHFT		4
+#define CA_DEBUG_GW_VSEL		(0xfull << 8)
+#define CA_DEBUG_GW_VSEL_SHFT		8
+#define CA_DEBUG_GT_VSEL		(0xfull << 12)
+#define CA_DEBUG_GT_VSEL_SHFT		12
+#define CA_DEBUG_PD_VSEL		(0xfull << 16)
+#define CA_DEBUG_PD_VSEL_SHFT		16
+#define CA_DEBUG_AD_VSEL		(0xfull << 20)
+#define CA_DEBUG_AD_VSEL_SHFT		20
+#define CA_DEBUG_CX_VSEL		(0xfull << 24)
+#define CA_DEBUG_CX_VSEL_SHFT		24
+#define CA_DEBUG_CR_VSEL		(0xfull << 28)
+#define CA_DEBUG_CR_VSEL_SHFT		28
+#define CA_DEBUG_BA_VSEL		(0xfull << 32)
+#define CA_DEBUG_BA_VSEL_SHFT		32
+#define CA_DEBUG_PE_VSEL		(0xfull << 36)
+#define CA_DEBUG_PE_VSEL_SHFT		36
+#define CA_DEBUG_BO_VSEL		(0xfull << 40)
+#define CA_DEBUG_BO_VSEL_SHFT		40
+#define CA_DEBUG_BI_VSEL		(0xfull << 44)
+#define CA_DEBUG_BI_VSEL_SHFT		44
+#define CA_DEBUG_AS_VSEL		(0xfull << 48)
+#define CA_DEBUG_AS_VSEL_SHFT		48
+#define CA_DEBUG_PS_VSEL		(0xfull << 52)
+#define CA_DEBUG_PS_VSEL_SHFT		52
+#define CA_DEBUG_PM_VSEL		(0xfull << 56)
+#define CA_DEBUG_PM_VSEL_SHFT		56
+	/* bits 63:60 unused */
+
+/* ==== ca_debug_mux_core_sel */
+/* ==== ca_debug_mux_pci_sel */
+#define CA_DEBUG_MSEL0			(0x7ull << 0)
+#define CA_DEBUG_MSEL0_SHFT		0
+	/* bit 3 unused */
+#define CA_DEBUG_NSEL0			(0x7ull << 4)
+#define CA_DEBUG_NSEL0_SHFT		4
+	/* bit 7 unused */
+#define CA_DEBUG_MSEL1			(0x7ull << 8)
+#define CA_DEBUG_MSEL1_SHFT		8
+	/* bit 11 unused */
+#define CA_DEBUG_NSEL1			(0x7ull << 12)
+#define CA_DEBUG_NSEL1_SHFT		12
+	/* bit 15 unused */
+#define CA_DEBUG_MSEL2			(0x7ull << 16)
+#define CA_DEBUG_MSEL2_SHFT		16
+	/* bit 19 unused */
+#define CA_DEBUG_NSEL2			(0x7ull << 20)
+#define CA_DEBUG_NSEL2_SHFT		20
+	/* bit 23 unused */
+#define CA_DEBUG_MSEL3			(0x7ull << 24)
+#define CA_DEBUG_MSEL3_SHFT		24
+	/* bit 27 unused */
+#define CA_DEBUG_NSEL3			(0x7ull << 28)
+#define CA_DEBUG_NSEL3_SHFT		28
+	/* bit 31 unused */
+#define CA_DEBUG_MSEL4			(0x7ull << 32)
+#define CA_DEBUG_MSEL4_SHFT		32
+	/* bit 35 unused */
+#define CA_DEBUG_NSEL4			(0x7ull << 36)
+#define CA_DEBUG_NSEL4_SHFT		36
+	/* bit 39 unused */
+#define CA_DEBUG_MSEL5			(0x7ull << 40)
+#define CA_DEBUG_MSEL5_SHFT		40
+	/* bit 43 unused */
+#define CA_DEBUG_NSEL5			(0x7ull << 44)
+#define CA_DEBUG_NSEL5_SHFT		44
+	/* bit 47 unused */
+#define CA_DEBUG_MSEL6			(0x7ull << 48)
+#define CA_DEBUG_MSEL6_SHFT		48
+	/* bit 51 unused */
+#define CA_DEBUG_NSEL6			(0x7ull << 52)
+#define CA_DEBUG_NSEL6_SHFT		52
+	/* bit 55 unused */
+#define CA_DEBUG_MSEL7			(0x7ull << 56)
+#define CA_DEBUG_MSEL7_SHFT		56
+	/* bit 59 unused */
+#define CA_DEBUG_NSEL7			(0x7ull << 60)
+#define CA_DEBUG_NSEL7_SHFT		60
+	/* bit 63 unused */
+
+
+/* ==== ca_debug_domain_sel */
+#define CA_DEBUG_DOMAIN_L		(1ull << 0)
+#define CA_DEBUG_DOMAIN_H		(1ull << 1)
+	/* bits 63:2 unused */
+
+/* ==== ca_gart_ptr_table */
+#define CA_GART_PTR_VAL			(1ull << 0)
+	/* bits 11:1 unused */
+#define CA_GART_PTR_ADDR		(0xfffffffffffull << 12)
+#define CA_GART_PTR_ADDR_SHFT		12
+	/* bits 63:56 unused */
+
+/* ==== ca_gart_tlb_addr[0-7] */
+#define CA_GART_TLB_ADDR		(0xffffffffffffffull << 0)
+#define CA_GART_TLB_ADDR_SHFT		0
+	/* bits 62:56 unused */
+#define CA_GART_TLB_ENTRY_VAL		(1ull << 63)
+
+/*
+ * PIO address space ranges for TIO:CA
+ */
+
+/* CA internal registers */
+#define CA_PIO_ADMIN			0x00000000
+#define CA_PIO_ADMIN_LEN		0x00010000
+
+/* GFX Write Buffer - Diagnostics */
+#define CA_PIO_GFX			0x00010000
+#define CA_PIO_GFX_LEN			0x00010000
+
+/* AGP DMA Write Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAWRITE		0x00020000
+#define CA_PIO_AGP_DMAWRITE_LEN		0x00010000
+
+/* AGP DMA READ Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAREAD		0x00030000
+#define CA_PIO_AGP_DMAREAD_LEN		0x00010000
+
+/* PCI Config Type 0 */
+#define CA_PIO_PCI_TYPE0_CONFIG		0x01000000
+#define CA_PIO_PCI_TYPE0_CONFIG_LEN	0x01000000
+
+/* PCI Config Type 1 */
+#define CA_PIO_PCI_TYPE1_CONFIG		0x02000000
+#define CA_PIO_PCI_TYPE1_CONFIG_LEN	0x01000000
+
+/* PCI I/O Cycles - mapped to PCI Address 0x00000000-0x04ffffff */
+#define CA_PIO_PCI_IO			0x03000000
+#define CA_PIO_PCI_IO_LEN		0x05000000
+
+/* PCI MEM Cycles - mapped to PCI with CA_PIO_ADDR_OFFSET of ca_control1 */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM_OFFSET		0x08000000
+#define CA_PIO_PCI_MEM_OFFSET_LEN	0x08000000
+
+/* PCI MEM Cycles - mapped to PCI Address 0x00000000-0xbfffffff */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM			0x40000000
+#define CA_PIO_PCI_MEM_LEN		0xc0000000
+
+/*
+ * DMA space
+ *
+ * The CA aperature (ie. bus address range) mapped by the GART is segmented into
+ * two parts.  The lower portion of the aperature is used for mapping 32 bit
+ * PCI addresses which are managed by the dma interfaces in this file.  The
+ * upper poprtion of the aperature is used for mapping 48 bit AGP addresses.
+ * The AGP portion of the aperature is managed by the agpgart_be.c driver
+ * in drivers/linux/agp.  There are ca-specific hooks in that driver to
+ * manipulate the gart, but management of the AGP portion of the aperature
+ * is the responsibility of that driver.
+ *
+ * CA allows three main types of DMA mapping:
+ *
+ * PCI 64-bit	Managed by this driver
+ * PCI 32-bit 	Managed by this driver
+ * AGP 48-bit	Managed by hooks in the /dev/agpgart driver
+ *
+ * All of the above can optionally be remapped through the GART.  The following
+ * table lists the combinations of addressing types and GART remapping that
+ * is currently supported by the driver (h/w supports all, s/w limits this):
+ *
+ *		PCI64		PCI32		AGP48
+ * GART		no		yes		yes
+ * Direct	yes		yes		no
+ *
+ * GART remapping of PCI64 is not done because there is no need to.  The
+ * 64 bit PCI address holds all of the information necessary to target any
+ * memory in the system.
+ *
+ * AGP48 is always mapped through the GART.  Management of the AGP48 portion
+ * of the aperature is the responsibility of code in the agpgart_be driver.
+ *
+ * The non-64 bit bus address space will currently be partitioned like this:
+ *
+ *	0xffff_ffff_ffff	+--------
+ *				| AGP48 direct
+ *				| Space managed by this driver
+ *	CA_AGP_DIRECT_BASE	+--------
+ *				| AGP GART mapped (gfx aperature)
+ *				| Space managed by /dev/agpgart driver
+ *				| This range is exposed to the agpgart
+ * 				| driver as the "graphics aperature"
+ *	CA_AGP_MAPPED_BASE	+-----
+ *				| PCI GART mapped
+ *				| Space managed by this driver		
+ *	CA_PCI32_MAPPED_BASE	+----
+ *				| PCI32 direct
+ *				| Space managed by this driver
+ *	0xC000_0000		+--------
+ *	(CA_PCI32_DIRECT_BASE)
+ *
+ * The bus address range CA_PCI32_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the CA aperature.  Addresses falling in this range will
+ * be remapped using the GART.
+ *
+ * The bus address range CA_AGP_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the graphics aperature.  This is a subset of the CA
+ * aperature and is under the control of the agpgart_be driver.
+ *
+ * CA_PCI32_MAPPED_BASE, CA_AGP_MAPPED_BASE, and CA_AGP_DIRECT_BASE are
+ * somewhat arbitrary values.  The known constraints on choosing these is:
+ *
+ * 1)  CA_AGP_DIRECT_BASE-CA_PCI32_MAPPED_BASE+1 (the CA aperature size)
+ *     must be one of the values supported by the ca_gart_aperature register.
+ *     Currently valid values are: 4MB through 4096MB in powers of 2 increments
+ *
+ * 2)  CA_AGP_DIRECT_BASE-CA_AGP_MAPPED_BASE+1 (the gfx aperature size)
+ *     must be in MB units since that's what the agpgart driver assumes.
+ */
+
+/*
+ * Define Bus DMA ranges.  These are configurable (see constraints above)
+ * and will probably need tuning based on experience.
+ */
+
+
+/*
+ * 11/24/03
+ * CA has an addressing glitch w.r.t. PCI direct 32 bit DMA that makes it
+ * generally unusable.  The problem is that for PCI direct 32 
+ * DMA's, all 32 bits of the bus address are used to form the lower 32 bits
+ * of the coretalk address, and coretalk bits 38:32 come from a register.
+ * Since only PCI bus addresses 0xC0000000-0xFFFFFFFF (1GB) are available
+ * for DMA (the rest is allocated to PIO), host node addresses need to be
+ * such that their lower 32 bits fall in the 0xC0000000-0xffffffff range
+ * as well.  So there can be no PCI32 direct DMA below 3GB!!  For this
+ * reason we set the CA_PCI32_DIRECT_SIZE to 0 which essentially makes
+ * tioca_dma_direct32() a noop but preserves the code flow should this issue
+ * be fixed in a respin.
+ *
+ * For now, all PCI32 DMA's must be mapped through the GART.
+ */
+
+#define CA_PCI32_DIRECT_BASE	0xC0000000UL	/* BASE not configurable */
+#define CA_PCI32_DIRECT_SIZE	0x00000000UL	/* 0 MB */
+
+#define CA_PCI32_MAPPED_BASE	0xC0000000UL
+#define CA_PCI32_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_MAPPED_BASE	0x80000000UL
+#define CA_AGP_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_DIRECT_BASE	0x40000000UL	/* 2GB */
+#define CA_AGP_DIRECT_SIZE	0x40000000UL
+
+#define CA_APERATURE_BASE	(CA_AGP_MAPPED_BASE)
+#define CA_APERATURE_SIZE	(CA_AGP_MAPPED_SIZE+CA_PCI32_MAPPED_SIZE)
+
+#endif  /* _ASM_IA64_SN_TIO_TIOCA_H */
diff --git a/include/asm-ia64/sn/tioca_provider.h b/include/asm-ia64/sn/tioca_provider.h
new file mode 100644
index 000000000000..b6acc22ab239
--- /dev/null
+++ b/include/asm-ia64/sn/tioca_provider.h
@@ -0,0 +1,206 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+#define _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+
+#include <asm/sn/tioca.h>
+
+/*
+ * WAR enables
+ * Defines for individual WARs. Each is a bitmask of applicable
+ * part revision numbers. (1 << 1) == rev A, (1 << 2) == rev B,
+ * (3 << 1) == (rev A or rev B), etc
+ */
+
+#define TIOCA_WAR_ENABLED(pv, tioca_common) \
+	((1 << tioca_common->ca_rev) & pv)
+
+  /* TIO:ICE:FRZ:Freezer loses a PIO data ucred on PIO RD RSP with CW error */
+#define PV907908 (1 << 1)
+  /* ATI config space problems after BIOS execution starts */
+#define PV908234 (1 << 1)
+  /* CA:AGPDMA write request data mismatch with ABC1CL merge */
+#define PV895469 (1 << 1)
+  /* TIO:CA TLB invalidate of written GART entries possibly not occuring in CA*/
+#define PV910244 (1 << 1)
+
+struct tioca_dmamap{
+	struct list_head	cad_list;	/* headed by ca_list */
+
+	dma_addr_t		cad_dma_addr;	/* Linux dma handle */
+	uint			cad_gart_entry; /* start entry in ca_gart_pagemap */
+	uint			cad_gart_size;	/* #entries for this map */
+};
+
+/*
+ * Kernel only fields.  Prom may look at this stuff for debugging only.
+ * Access this structure through the ca_kernel_private ptr.
+ */
+
+struct tioca_common ;
+
+struct tioca_kernel {
+	struct tioca_common	*ca_common;	/* tioca this belongs to */
+	struct list_head	ca_list;	/* list of all ca's */
+	struct list_head	ca_dmamaps;
+	spinlock_t		ca_lock;	/* Kernel lock */
+	cnodeid_t		ca_closest_node;
+	struct list_head	*ca_devices;	/* bus->devices */
+
+	/*
+	 * General GART stuff
+	 */
+	uint64_t	ca_ap_size;		/* size of aperature in bytes */
+	uint32_t	ca_gart_entries;	/* # uint64_t entries in gart */
+	uint32_t	ca_ap_pagesize; 	/* aperature page size in bytes */
+	uint64_t	ca_ap_bus_base; 	/* bus address of CA aperature */
+	uint64_t	ca_gart_size;		/* gart size in bytes */
+	uint64_t	*ca_gart;		/* gart table vaddr */
+	uint64_t	ca_gart_coretalk_addr;	/* gart coretalk addr */
+	uint8_t		ca_gart_iscoherent;	/* used in tioca_tlbflush */
+
+	/* PCI GART convenience values */
+	uint64_t	ca_pciap_base;		/* pci aperature bus base address */
+	uint64_t	ca_pciap_size;		/* pci aperature size (bytes) */
+	uint64_t	ca_pcigart_base;	/* gfx GART bus base address */
+	uint64_t	*ca_pcigart;		/* gfx GART vm address */
+	uint32_t	ca_pcigart_entries;
+	uint32_t	ca_pcigart_start;	/* PCI start index in ca_gart */
+	void		*ca_pcigart_pagemap;
+
+	/* AGP GART convenience values */
+	uint64_t	ca_gfxap_base;		/* gfx aperature bus base address */
+	uint64_t	ca_gfxap_size;		/* gfx aperature size (bytes) */
+	uint64_t	ca_gfxgart_base;	/* gfx GART bus base address */
+	uint64_t	*ca_gfxgart;		/* gfx GART vm address */
+	uint32_t	ca_gfxgart_entries;
+	uint32_t	ca_gfxgart_start;	/* agpgart start index in ca_gart */
+};
+
+/*
+ * Common tioca info shared between kernel and prom
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES
+ * TO THE PROM VERSION.
+ */
+
+struct tioca_common {
+	struct pcibus_bussoft	ca_common;	/* common pciio header */
+
+	uint32_t		ca_rev;
+	uint32_t		ca_closest_nasid;
+
+	uint64_t		ca_prom_private;
+	uint64_t		ca_kernel_private;
+};
+
+/**
+ * tioca_paddr_to_gart - Convert an SGI coretalk address to a CA GART entry
+ * @paddr: page address to convert
+ *
+ * Convert a system [coretalk] address to a GART entry.  GART entries are
+ * formed using the following:
+ *
+ *     data = ( (1<<63) |  ( (REMAP_NODE_ID << 40) | (MD_CHIPLET_ID << 38) | 
+ * (REMAP_SYS_ADDR) ) >> 12 )
+ *
+ * DATA written to 1 GART TABLE Entry in system memory is remapped system
+ * addr for 1 page 
+ *
+ * The data is for coretalk address format right shifted 12 bits with a
+ * valid bit.
+ *
+ *	GART_TABLE_ENTRY [ 25:0 ]  -- REMAP_SYS_ADDRESS[37:12].
+ *	GART_TABLE_ENTRY [ 27:26 ] -- SHUB MD chiplet id.
+ *	GART_TABLE_ENTRY [ 41:28 ] -- REMAP_NODE_ID.
+ *	GART_TABLE_ENTRY [ 63 ]    -- Valid Bit 
+ */
+static inline u64
+tioca_paddr_to_gart(unsigned long paddr)
+{
+	/*
+	 * We are assuming right now that paddr already has the correct
+	 * format since the address from xtalk_dmaXXX should already have
+	 * NODE_ID, CHIPLET_ID, and SYS_ADDR in the correct locations.
+	 */
+
+	return ((paddr) >> 12) | (1UL << 63);
+}
+
+/**
+ * tioca_physpage_to_gart - Map a host physical page for SGI CA based DMA
+ * @page_addr: system page address to map
+ */
+
+static inline unsigned long
+tioca_physpage_to_gart(uint64_t page_addr)
+{
+	uint64_t coretalk_addr;
+
+	coretalk_addr = PHYS_TO_TIODMA(page_addr);
+	if (!coretalk_addr) {
+		return 0;
+	}
+
+	return tioca_paddr_to_gart(coretalk_addr);
+}
+
+/**
+ * tioca_tlbflush - invalidate cached SGI CA GART TLB entries
+ * @tioca_kernel: CA context 
+ *
+ * Invalidate tlb entries for a given CA GART.  Main complexity is to account
+ * for revA bug.
+ */
+static inline void
+tioca_tlbflush(struct tioca_kernel *tioca_kernel)
+{
+	volatile uint64_t tmp;
+	volatile struct tioca *ca_base;
+	struct tioca_common *tioca_common;
+
+	tioca_common = tioca_kernel->ca_common;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	/*
+	 * Explicit flushes not needed if GART is in cached mode
+	 */
+	if (tioca_kernel->ca_gart_iscoherent) {
+		if (TIOCA_WAR_ENABLED(PV910244, tioca_common)) {
+			/*
+			 * PV910244:  RevA CA needs explicit flushes.
+			 * Need to put GART into uncached mode before
+			 * flushing otherwise the explicit flush is ignored.
+			 *
+			 * Alternate WAR would be to leave GART cached and
+			 * touch every CL aligned GART entry.
+			 */
+
+			ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
+			ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
+			ca_base->ca_control2 |=
+			    (0x2ull << CA_GART_MEM_PARAM_SHFT);
+			tmp = ca_base->ca_control2;
+		}
+
+		return;
+	}
+
+	/*
+	 * Gart in uncached mode ... need an explicit flush.
+	 */
+
+	ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
+	tmp = ca_base->ca_control2;
+}
+
+extern uint32_t	tioca_gart_found;
+extern int tioca_init_provider(void);
+extern void tioca_fastwrite_enable(struct tioca_kernel *tioca_kern);
+#endif /* _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H */

From bf1cf98fa941fea5e630e341db4a294d531aaa3e Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 25 Apr 2005 11:42:39 -0700
Subject: [PATCH 04/34] [IA64-SGI] Change SAL call request code for SN systems

Change the value of the SAL call number for a new SAL request. The
initial implementation in the PROM did not match what the OS expected.
Since the OS can run on PROMs that do not implement the new call,
changing the call number avoids the issue. New PROMs will implement
the new call number. (This avoids problems with the 4.05 PROM).

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/sn_sal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 88c31b53dc09..81a1cf1e4f5a 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -35,8 +35,8 @@
 #define  SN_SAL_PRINT_ERROR			   0x02000012
 #define  SN_SAL_SET_ERROR_HANDLING_FEATURES	   0x0200001a	// reentrant
 #define  SN_SAL_GET_FIT_COMPT			   0x0200001b	// reentrant
-#define  SN_SAL_GET_SN_INFO                        0x0200001c
 #define  SN_SAL_GET_SAPIC_INFO                     0x0200001d
+#define  SN_SAL_GET_SN_INFO                        0x0200001e
 #define  SN_SAL_CONSOLE_PUTC                       0x02000021
 #define  SN_SAL_CONSOLE_GETC                       0x02000022
 #define  SN_SAL_CONSOLE_PUTS                       0x02000023

From 8297511530b0d2a281c796e738683951a59a020c Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@hpl.hp.com>
Date: Mon, 25 Apr 2005 11:44:02 -0700
Subject: [PATCH 05/34] [IA64] add missing cpu_relax() in ITC syncing code

Call cpu_relax() in busy-waiting loops of the ITC-syncing code.

Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/smpboot.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index ca1536db3394..dbc6b610cc64 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -1,7 +1,7 @@
 /*
  * SMP boot-related support
  *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
@@ -156,7 +156,8 @@ sync_master (void *arg)
 	local_irq_save(flags);
 	{
 		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
-			while (!go[MASTER]);
+			while (!go[MASTER])
+				cpu_relax();
 			go[MASTER] = 0;
 			go[SLAVE] = ia64_get_itc();
 		}
@@ -179,7 +180,8 @@ get_delta (long *rt, long *master)
 	for (i = 0; i < NUM_ITERS; ++i) {
 		t0 = ia64_get_itc();
 		go[MASTER] = 1;
-		while (!(tm = go[SLAVE]));
+		while (!(tm = go[SLAVE]))
+			cpu_relax();
 		go[SLAVE] = 0;
 		t1 = ia64_get_itc();
 
@@ -258,7 +260,8 @@ ia64_sync_itc (unsigned int master)
 		return;
 	}
 
-	while (go[MASTER]);	/* wait for master to be ready */
+	while (go[MASTER])
+		cpu_relax();	/* wait for master to be ready */
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{

From e8d1cb2f280aa53e1c75c8b5fcbf80b3481d0caa Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Mon, 25 Apr 2005 11:45:26 -0700
Subject: [PATCH 06/34] [IA64] Tighten up unw_unwind_to_user check

Detect user space by the unwind frame with predicate PRED_USER_STACK
set, instead of a user space IP.  Tighten up the last ditch check for
running off the top of the kernel stack.

Based on a suggestion by David Mosberger, reworked to fit the current
tree.  This survives my stress test which used to break 2.6.9 kernels.
Unlike 2.6.11, the stress test now unwinds to the correct point, so
gdb can get the user space registers.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/unwind.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index d494ff647cac..2776a074c6f1 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -1943,23 +1943,30 @@ EXPORT_SYMBOL(unw_unwind);
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp;
+	unsigned long ip, sp, pr = 0;
 
 	while (unw_unwind(info) >= 0) {
-		if (unw_get_rp(info, &ip) < 0) {
-			unw_get_ip(info, &ip);
-			UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
-				   __FUNCTION__, ip);
+		unw_get_sp(info, &sp);
+		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+		    < IA64_PT_REGS_SIZE) {
+			UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
+				   __FUNCTION__);
+			break;
+		}
+		if (unw_is_intr_frame(info) &&
+		    (pr & (1UL << PRED_USER_STACK)))
+			return 0;
+		if (unw_get_pr (info, &pr) < 0) {
+			unw_get_rp(info, &ip);
+			UNW_DPRINT(0, "unwind.%s: failed to read "
+				   "predicate register (ip=0x%lx)\n",
+				__FUNCTION__, ip);
 			return -1;
 		}
-		unw_get_sp(info, &sp);
-		if (sp >= (unsigned long)info->task + IA64_STK_OFFSET)
-			break;
-		if (ip < FIXADDR_USER_END)
-			return 0;
 	}
 	unw_get_ip(info, &ip);
-	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
+	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+		   __FUNCTION__, ip);
 	return -1;
 }
 EXPORT_SYMBOL(unw_unwind_to_user);

From 30325d17715302a60c9afdaacaafaeb056b7e880 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@hpl.hp.com>
Date: Mon, 25 Apr 2005 13:03:16 -0700
Subject: [PATCH 07/34] [IA64] speed up syscall path a bit more

Recently I noticed that clearing ar.ssd/ar.csd right before srlz.d is
causing significant stalling in the syscall path.  The patch below
fixes that by moving the register-writes after srlz.d.  On a Madison,
this drops break-based getpid() from 241 to 226 cycles (-15 cycles).

Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/entry.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0272c010a3ba..73e23dafe8e9 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -728,12 +728,8 @@ ENTRY(ia64_leave_syscall)
 	mov f8=f0		// clear f8
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	mov.m ar.ssd=r0		// M2 clear ar.ssd
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
 	ld8 r25=[r3],16		// M0|1 load ar.unat
-	mov.m ar.csd=r0		// M2 clear ar.csd
-	mov r22=r0		// clear r22
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
 	;;
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
@@ -756,11 +752,15 @@ ENTRY(ia64_leave_syscall)
 	mov f7=f0		// clear f7
 	;;
 	ld8.fill r12=[r2]	// restore r12 (sp)
+	mov.m ar.ssd=r0		// M2 clear ar.ssd
+	mov r22=r0		// clear r22
+
 	ld8.fill r15=[r3]	// restore r15
+(pUStk) st1 [r14]=r17
 	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
 	;;
 (pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
+	mov.m ar.csd=r0		// M2 clear ar.csd
 	mov b6=r18		// I0  restore b6
 	;;
 	mov r14=r0		// clear r14

From 4a5c13c7eb0d55bfd2cf3100c55f1e3d8df37576 Mon Sep 17 00:00:00 2001
From: Mark Goodwin <markgw@sgi.com>
Date: Mon, 25 Apr 2005 13:04:22 -0700
Subject: [PATCH 08/34] [IA64-SGI] Altix SN topology support for new chipsets
 and pci topology

please accept this patch to the Altix SN platform topology export
interface to support new chipsets and to export PCI topology.

This follows on top of Jack Steiner's patch dated March 1st
("New chipset support for SN platform").

Signed-off-by: Mark Goodwin <markgw@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/sn2/sn_hwperf.c | 133 ++++++++++++++++++++++++++--
 include/asm-ia64/sn/sn_sal.h        |  11 +++
 2 files changed, 135 insertions(+), 9 deletions(-)

diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 197356460ee1..3bff99130d5e 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -28,6 +28,7 @@
 #include <linux/vmalloc.h>
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
+#include <linux/utsname.h>
 #include <linux/cpumask.h>
 #include <linux/smp_lock.h>
 #include <linux/nodemask.h>
@@ -43,6 +44,7 @@
 #include <asm/sn/module.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/sn2/sn_hwperf.h>
+#include <asm/sn/addrs.h>
 
 static void *sn_hwperf_salheap = NULL;
 static int sn_hwperf_obj_cnt = 0;
@@ -81,26 +83,45 @@ out:
 	return e;
 }
 
+static int sn_hwperf_location_to_bpos(char *location,
+	int *rack, int *bay, int *slot, int *slab)
+{
+	char type;
+
+	/* first scan for an old style geoid string */
+	if (sscanf(location, "%03d%c%02d#%d",
+		rack, &type, bay, slab) == 4)
+		*slot = 0; 
+	else /* scan for a new bladed geoid string */
+	if (sscanf(location, "%03d%c%02d^%02d#%d",
+		rack, &type, bay, slot, slab) != 5)
+		return -1; 
+	/* success */
+	return 0;
+}
+
 static int sn_hwperf_geoid_to_cnode(char *location)
 {
 	int cnode;
 	geoid_t geoid;
 	moduleid_t module_id;
-	char type;
-	int rack, slot, slab;
-	int this_rack, this_slot, this_slab;
+	int rack, bay, slot, slab;
+	int this_rack, this_bay, this_slot, this_slab;
 
-	if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4)
+	if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
 		return -1;
 
 	for (cnode = 0; cnode < numionodes; cnode++) {
 		geoid = cnodeid_get_geoid(cnode);
 		module_id = geo_module(geoid);
 		this_rack = MODULE_GET_RACK(module_id);
-		this_slot = MODULE_GET_BPOS(module_id);
+		this_bay = MODULE_GET_BPOS(module_id);
+		this_slot = 0; /* XXX */
 		this_slab = geo_slab(geoid);
-		if (rack == this_rack && slot == this_slot && slab == this_slab)
+		if (rack == this_rack && bay == this_bay &&
+			slot == this_slot && slab == this_slab) {
 			break;
+		}
 	}
 
 	return cnode < numionodes ? cnode : -1;
@@ -153,11 +174,29 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
 	return slabname;
 }
 
+static void print_pci_topology(struct seq_file *s,
+	struct sn_hwperf_object_info *obj, int *ordinal,
+	char *pci_topo_buf, int len)
+{
+	char *p1;
+	char *p2;
+
+	for (p1=pci_topo_buf; *p1 && p1 < pci_topo_buf + len;) {
+		if (!(p2 = strchr(p1, '\n')))
+			break;
+		*p2 = '\0';
+		seq_printf(s, "pcibus %d %s-%s\n",
+			*ordinal, obj->location, p1);
+		(*ordinal)++;
+		p1 = p2 + 1;
+	}
+}
+
 static int sn_topology_show(struct seq_file *s, void *d)
 {
 	int sz;
 	int pt;
-	int e;
+	int e = 0;
 	int i;
 	int j;
 	const char *slabname;
@@ -169,11 +208,46 @@ static int sn_topology_show(struct seq_file *s, void *d)
 	struct sn_hwperf_object_info *p;
 	struct sn_hwperf_object_info *obj = d;	/* this object */
 	struct sn_hwperf_object_info *objs = s->private; /* all objects */
+	int rack, bay, slot, slab;
+	u8 shubtype;
+	u8 system_size;
+	u8 sharing_size;
+	u8 partid;
+	u8 coher;
+	u8 nasid_shift;
+	u8 region_size;
+	u16 nasid_mask;
+	int nasid_msb;
+	char *pci_topo_buf;
+	int pci_bus_ordinal = 0;
+	static int pci_topo_buf_len = 256;
 
 	if (obj == objs) {
-		seq_printf(s, "# sn_topology version 1\n");
+		seq_printf(s, "# sn_topology version 2\n");
 		seq_printf(s, "# objtype ordinal location partition"
 			" [attribute value [, ...]]\n");
+
+		if (ia64_sn_get_sn_info(0,
+			&shubtype, &nasid_mask, &nasid_shift, &system_size,
+			&sharing_size, &partid, &coher, &region_size))
+			BUG();
+		for (nasid_msb=63; nasid_msb > 0; nasid_msb--) {
+			if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb))
+				break;
+		}
+		seq_printf(s, "partition %u %s local "
+			"shubtype %s, "
+			"nasid_mask 0x%016lx, "
+			"nasid_bits %d:%d, "
+			"system_size %d, "
+			"sharing_size %d, "
+			"coherency_domain %d, "
+			"region_size %d\n",
+
+			partid, system_utsname.nodename,
+			shubtype ? "shub2" : "shub1", 
+			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
+			system_size, sharing_size, coher, region_size);
 	}
 
 	if (SN_HWPERF_FOREIGN(obj)) {
@@ -181,7 +255,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		return 0;
 	}
 
-	for (i = 0; obj->name[i]; i++) {
+	for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) {
 		if (obj->name[i] == ' ')
 			obj->name[i] = '_';
 	}
@@ -221,6 +295,43 @@ static int sn_topology_show(struct seq_file *s, void *d)
 				seq_putc(s, '\n');
 			}
 		}
+
+		/*
+		 * PCI busses attached to this node, if any
+		 */
+		do {
+			if (!(pci_topo_buf = vmalloc(pci_topo_buf_len))) {
+				printk("sn_topology_show: kmalloc failed\n");
+				break;
+			}
+
+			if (sn_hwperf_location_to_bpos(obj->location,
+				&rack, &bay, &slot, &slab) != 0)
+				continue;
+
+			e = ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab,
+			    	pci_topo_buf, pci_topo_buf_len);
+
+			switch (e) {
+			case SALRET_NOT_IMPLEMENTED:
+			case SALRET_INVALID_ARG:
+				/* ignore, don't print anything */
+				e = SN_HWPERF_OP_OK;
+				break;
+
+			case SALRET_ERROR:
+				/* retry with a bigger buffer */ 
+				pci_topo_buf_len += 256;
+				break;
+
+			case SN_HWPERF_OP_OK:
+				/* export pci bus info */
+				print_pci_topology(s, obj, &pci_bus_ordinal,
+					pci_topo_buf, pci_topo_buf_len);
+				break;
+			}
+			vfree(pci_topo_buf);
+		} while (e != SN_HWPERF_OP_OK && pci_topo_buf_len < 0x200000);
 	}
 
 	if (obj->ports) {
@@ -397,6 +508,9 @@ static int sn_hwperf_map_err(int hwperf_err)
 		break;
 
 	case SN_HWPERF_OP_BUSY:
+		e = -EBUSY;
+		break;
+
 	case SN_HWPERF_OP_RECONFIGURE:
 		e = -EAGAIN;
 		break;
@@ -549,6 +663,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 		r = sn_hwperf_op_cpu(&op_info);
 		if (r) {
 			r = sn_hwperf_map_err(r);
+			a.v0 = v0;
 			goto error;
 		}
 		break;
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 81a1cf1e4f5a..410d356b40da 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -74,6 +74,7 @@
 #define  SN_SAL_IOIF_GET_PCIBUS_INFO		   0x02000056
 #define  SN_SAL_IOIF_GET_PCIDEV_INFO		   0x02000057
 #define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058
+#define  SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000059
 
 #define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
 
@@ -1012,4 +1013,14 @@ ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2,
 	return (int) rv.status;
 }
 
+static inline int
+ia64_sn_ioif_get_pci_topology(u64 rack, u64 bay, u64 slot, u64 slab,
+			      char *buf, u64 len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY,
+		rack, bay, slot, slab, buf, len, 0);
+	return (int) rv.status;
+}
+
 #endif /* _ASM_IA64_SN_SN_SAL_H */

From f1e2a1c8a1fe16db5f4a7c0c1551d6e1b97dcbb2 Mon Sep 17 00:00:00 2001
From: Mark Goodwin <markgw@sgi.com>
Date: Mon, 25 Apr 2005 13:05:08 -0700
Subject: [PATCH 09/34] [IA64-SGI] Altix SN topology fix potential infinite
 loop

Fix infinite loop if sn_hwperf_location_to_bpos() fails.

Signed-off-by: Mark Goodwin <markgw@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/sn2/sn_hwperf.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 3bff99130d5e..e731fcb95f90 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -300,14 +300,15 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		 * PCI busses attached to this node, if any
 		 */
 		do {
-			if (!(pci_topo_buf = vmalloc(pci_topo_buf_len))) {
-				printk("sn_topology_show: kmalloc failed\n");
+			if (sn_hwperf_location_to_bpos(obj->location,
+				&rack, &bay, &slot, &slab)) {
 				break;
 			}
 
-			if (sn_hwperf_location_to_bpos(obj->location,
-				&rack, &bay, &slot, &slab) != 0)
-				continue;
+			if (!(pci_topo_buf = vmalloc(pci_topo_buf_len))) {
+				printk("sn_topology_show: vmalloc failed\n");
+				break;
+			}
 
 			e = ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab,
 			    	pci_topo_buf, pci_topo_buf_len);
@@ -325,6 +326,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 				break;
 
 			case SN_HWPERF_OP_OK:
+			default:
 				/* export pci bus info */
 				print_pci_topology(s, obj, &pci_bus_ordinal,
 					pci_topo_buf, pci_topo_buf_len);

From be539c73b54dcc9f54fb2c2b70e204c93b616c9b Mon Sep 17 00:00:00 2001
From: Colin Ngam <cngam@sgi.com>
Date: Mon, 25 Apr 2005 13:06:28 -0700
Subject: [PATCH 10/34] [IA64-SGI] Shub2 provides an addition of 2 External
 Interrupt events.

Signed-off-by: Colin Ngam <cngam@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/irq.c      | 15 ++-------------
 include/asm-ia64/sn/shub_mmr.h | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index b52d32975f2b..0f4e8138658f 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -82,20 +82,9 @@ static void sn_ack_irq(unsigned int irq)
 	nasid = get_nasid();
 	event_occurred =
 	    HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED));
-	if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT);
-	}
+	mask = event_occurred & SH_ALL_INT_MASK;
 	HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS),
-	      mask);
+		 mask);
 	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
 
 	move_irq(irq);
diff --git a/include/asm-ia64/sn/shub_mmr.h b/include/asm-ia64/sn/shub_mmr.h
index 5c2fcf13d5ce..6ec37e816a9e 100644
--- a/include/asm-ia64/sn/shub_mmr.h
+++ b/include/asm-ia64/sn/shub_mmr.h
@@ -129,6 +129,23 @@
 #define SH_EVENT_OCCURRED_II_INT1_SHFT           30
 #define SH_EVENT_OCCURRED_II_INT1_MASK           0x0000000040000000
 
+/*   SH2_EVENT_OCCURRED_EXTIO_INT2                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT2                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_SHFT       33
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_MASK       0x0000000200000000
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT3                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT3                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_SHFT       34
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_MASK       0x0000000400000000
+
+#define SH_ALL_INT_MASK \
+	(SH_EVENT_OCCURRED_UART_INT_MASK | SH_EVENT_OCCURRED_IPI_INT_MASK | \
+	 SH_EVENT_OCCURRED_II_INT0_MASK | SH_EVENT_OCCURRED_II_INT1_MASK | \
+	 SH_EVENT_OCCURRED_II_INT1_MASK | SH2_EVENT_OCCURRED_EXTIO_INT2_MASK | \
+	 SH2_EVENT_OCCURRED_EXTIO_INT3_MASK)
+
+
 /* ==================================================================== */
 /*                         LEDS                                         */
 /* ==================================================================== */

From 658b32cad9ae087bd34f35a925fd75b76d663d4e Mon Sep 17 00:00:00 2001
From: Colin Ngam <cngam@sgi.com>
Date: Mon, 25 Apr 2005 13:07:00 -0700
Subject: [PATCH 11/34] [IA64-SGI] support variable length nasids in shub2

This patch enables our TIO IO chipset to support variable length nasids in
Shub2 chipset.

Signed-off-by: Colin Ngam <cngam@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/addrs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
index c916bd22767a..ae0bc99d573e 100644
--- a/include/asm-ia64/sn/addrs.h
+++ b/include/asm-ia64/sn/addrs.h
@@ -154,8 +154,9 @@
  *           the chiplet id is zero.  If we implement TIO-TIO dma, we might need
  *           to insert a chiplet id into this macro.  However, it is our belief
  *           right now that this chiplet id will be ICE, which is also zero.
+ *           Nasid starts on bit 40.
  */
-#define PHYS_TO_TIODMA(x)	( (((u64)(x) & NASID_MASK) << 2) | NODE_OFFSET(x))
+#define PHYS_TO_TIODMA(x)	( (((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x))
 #define PHYS_TO_DMA(x)          ( (((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x))
 
 

From 4944930ab748942e41ea4dc313fcb0946aee3f17 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Mon, 25 Apr 2005 13:08:30 -0700
Subject: [PATCH 12/34] [IA64] perfmon: make pfm_sysctl a global, and other
 cleanup

- make pfm_sysctl a global such that it is possible
  to enable/disable debug printk in sampling formats
  using PFM_DEBUG.

- remove unused pfm_debug_var variable

- fix a bug in pfm_handle_work where an BUG_ON() could
  be triggered. There is a path where pfm_handle_work()
  can be called with interrupts enabled, i.e., when
  TIF_NEED_RESCHED is set. The fix correct the masking
  and unmasking of interrupts in pfm_handle_work() such
  that we restore the interrupt mask as it was upon entry.

signed-off-by: stephane eranian <eranian@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/perfmon.c              | 59 ++++++++++++-------------
 arch/ia64/kernel/perfmon_default_smpl.c | 13 +-----
 include/asm-ia64/perfmon.h              | 12 +++++
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 71147be3279c..376fcbc3f8da 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -479,14 +479,6 @@ typedef struct {
 
 #define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
 
-typedef struct {
-	int	debug;		/* turn on/off debugging via syslog */
-	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
-	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
-	int	expert_mode;	/* turn on/off value checking */
-	int 	debug_pfm_read;
-} pfm_sysctl_t;
-
 typedef struct {
 	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
 	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
@@ -514,8 +506,8 @@ static LIST_HEAD(pfm_buffer_fmt_list);
 static pmu_config_t		*pmu_conf;
 
 /* sysctl() controls */
-static pfm_sysctl_t pfm_sysctl;
-int pfm_debug_var;
+pfm_sysctl_t pfm_sysctl;
+EXPORT_SYMBOL(pfm_sysctl);
 
 static ctl_table pfm_ctl_table[]={
 	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
@@ -1576,7 +1568,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 		goto abort_locked;
 	}
 
-	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
+	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
 
 	ret = -EFAULT;
   	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
@@ -3695,8 +3687,6 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 	pfm_sysctl.debug = m == 0 ? 0 : 1;
 
-	pfm_debug_var = pfm_sysctl.debug;
-
 	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
 
 	if (m == 0) {
@@ -4996,13 +4986,21 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 }
 
 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
-
+ /*
+  * pfm_handle_work() can be called with interrupts enabled
+  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
+  * call may sleep, therefore we must re-enable interrupts
+  * to avoid deadlocks. It is safe to do so because this function
+  * is called ONLY when returning to user level (PUStk=1), in which case
+  * there is no risk of kernel stack overflow due to deep
+  * interrupt nesting.
+  */
 void
 pfm_handle_work(void)
 {
 	pfm_context_t *ctx;
 	struct pt_regs *regs;
-	unsigned long flags;
+	unsigned long flags, dummy_flags;
 	unsigned long ovfl_regs;
 	unsigned int reason;
 	int ret;
@@ -5039,18 +5037,15 @@ pfm_handle_work(void)
 	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
 	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
 
+	/*
+	 * restore interrupt mask to what it was on entry.
+	 * Could be enabled/diasbled.
+	 */
 	UNPROTECT_CTX(ctx, flags);
 
-	 /*
-	  * pfm_handle_work() is currently called with interrupts disabled.
-	  * The down_interruptible call may sleep, therefore we
-	  * must re-enable interrupts to avoid deadlocks. It is
-	  * safe to do so because this function is called ONLY
-	  * when returning to user level (PUStk=1), in which case
-	  * there is no risk of kernel stack overflow due to deep
-	  * interrupt nesting.
-	  */
-	BUG_ON(flags & IA64_PSR_I);
+	/*
+	 * force interrupt enable because of down_interruptible()
+	 */
 	local_irq_enable();
 
 	DPRINT(("before block sleeping\n"));
@@ -5064,12 +5059,12 @@ pfm_handle_work(void)
 	DPRINT(("after block sleeping ret=%d\n", ret));
 
 	/*
-	 * disable interrupts to restore state we had upon entering
-	 * this function
+	 * lock context and mask interrupts again
+	 * We save flags into a dummy because we may have
+	 * altered interrupts mask compared to entry in this
+	 * function.
 	 */
-	local_irq_disable();
-
-	PROTECT_CTX(ctx, flags);
+	PROTECT_CTX(ctx, dummy_flags);
 
 	/*
 	 * we need to read the ovfl_regs only after wake-up
@@ -5095,7 +5090,9 @@ skip_blocking:
 	ctx->ctx_ovfl_regs[0] = 0UL;
 
 nothing_to_do:
-
+	/*
+	 * restore flags as they were upon entry
+	 */
 	UNPROTECT_CTX(ctx, flags);
 }
 
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index 965d29004555..344941db0a9e 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -20,24 +20,17 @@ MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
 MODULE_DESCRIPTION("perfmon default sampling format");
 MODULE_LICENSE("GPL");
 
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "debug");
-
-MODULE_PARM(debug_ovfl, "i");
-MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
-
-
 #define DEFAULT_DEBUG 1
 
 #ifdef DEFAULT_DEBUG
 #define DPRINT(a) \
 	do { \
-		if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #else
@@ -45,8 +38,6 @@ MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
 #define DPRINT_ovfl(a)
 #endif
 
-static int debug, debug_ovfl;
-
 static int
 default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
 {
diff --git a/include/asm-ia64/perfmon.h b/include/asm-ia64/perfmon.h
index 136c60e6bfcc..ed5416c5b1ac 100644
--- a/include/asm-ia64/perfmon.h
+++ b/include/asm-ia64/perfmon.h
@@ -254,6 +254,18 @@ extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int
 #define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
 #define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
 
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+	int	debug;		/* turn on/off debugging via syslog */
+	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
+	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
+	int	expert_mode;	/* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IA64_PERFMON_H */

From e1e19747ecce9117610b8f3b57a3e95734230319 Mon Sep 17 00:00:00 2001
From: Bruce Losure <blosure@sgi.com>
Date: Mon, 25 Apr 2005 13:09:41 -0700
Subject: [PATCH 13/34] [IA64-SGI] Bus driver for the CX port of SGI's TIO
 chip.

This patch is to provide CX port infrastructure for SGI TIO-based
h/w.   Also a 'core services' driver for SGI FPGA-based h/w.

Signed-off-by: Bruce Losure <blosure@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/configs/sn2_defconfig |   2 +
 arch/ia64/sn/kernel/Makefile    |   1 +
 drivers/char/Kconfig            |  14 +
 drivers/char/Makefile           |   1 +
 drivers/char/mbcs.c             | 849 ++++++++++++++++++++++++++++++++
 drivers/char/mbcs.h             | 553 +++++++++++++++++++++
 include/asm-ia64/sn/addrs.h     |   3 +
 include/asm-ia64/sn/tiocx.h     |  71 +++
 8 files changed, 1494 insertions(+)
 create mode 100644 drivers/char/mbcs.c
 create mode 100644 drivers/char/mbcs.h
 create mode 100644 include/asm-ia64/sn/tiocx.h

diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index bfeb952fe8e2..6ff7107fee4d 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -574,6 +574,8 @@ CONFIG_SERIAL_NONSTANDARD=y
 # CONFIG_N_HDLC is not set
 # CONFIG_STALDRV is not set
 CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 6c7f4d9e8ea0..4f381fb25049 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,3 +10,4 @@
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
 				   huberror.o io_init.o iomv.o klconflib.o sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
+obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 096a1202ea07..97ac4edf4655 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -399,6 +399,20 @@ config SGI_SNSC
 	  controller communication from user space (you want this!),
 	  say Y.  Otherwise, say N.
 
+config SGI_TIOCX
+       bool "SGI TIO CX driver support"
+       depends on (IA64_SGI_SN2 || IA64_GENERIC)
+       help
+         If you have an SGI Altix and you have fpga devices attached
+         to your TIO, say Y here, otherwise say N.
+
+config SGI_MBCS
+       tristate "SGI FPGA Core Services driver support"
+       depends on (IA64_SGI_SN2 || IA64_GENERIC)
+       help
+         If you have an SGI Altix with an attached SABrick
+         say Y or M here, otherwise say N.
+
 source "drivers/serial/Kconfig"
 
 config UNIX98_PTYS
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 54ed76af1a47..3ea8cc80ea3d 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_MMTIMER)		+= mmtimer.o
 obj-$(CONFIG_VIOCONS) += viocons.o
 obj-$(CONFIG_VIOTAPE)		+= viotape.o
 obj-$(CONFIG_HVCS)		+= hvcs.o
+obj-$(CONFIG_SGI_MBCS)		+= mbcs.o
 
 obj-$(CONFIG_PRINTER) += lp.o
 obj-$(CONFIG_TIPAR) += tipar.o
diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c
new file mode 100644
index 000000000000..ec7100556c50
--- /dev/null
+++ b/drivers/char/mbcs.c
@@ -0,0 +1,849 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+/*
+ *	MOATB Core Services driver.
+ */
+
+#include <linux/config.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/tiocx.h>
+#include "mbcs.h"
+
+#define MBCS_DEBUG 0
+#if MBCS_DEBUG
+#define DBG(fmt...)    printk(KERN_ALERT fmt)
+#else
+#define DBG(fmt...)
+#endif
+int mbcs_major;
+
+LIST_HEAD(soft_list);
+
+/*
+ * file operations
+ */
+struct file_operations mbcs_ops = {
+	.open = mbcs_open,
+	.llseek = mbcs_sram_llseek,
+	.read = mbcs_sram_read,
+	.write = mbcs_sram_write,
+	.mmap = mbcs_gscr_mmap,
+};
+
+struct mbcs_callback_arg {
+	int minor;
+	struct cx_dev *cx_dev;
+};
+
+static inline void mbcs_getdma_init(struct getdma *gdma)
+{
+	memset(gdma, 0, sizeof(struct getdma));
+	gdma->DoneIntEnable = 1;
+}
+
+static inline void mbcs_putdma_init(struct putdma *pdma)
+{
+	memset(pdma, 0, sizeof(struct putdma));
+	pdma->DoneIntEnable = 1;
+}
+
+static inline void mbcs_algo_init(struct algoblock *algo_soft)
+{
+	memset(algo_soft, 0, sizeof(struct algoblock));
+}
+
+static inline void mbcs_getdma_set(void *mmr,
+		       uint64_t hostAddr,
+		       uint64_t localAddr,
+		       uint64_t localRamSel,
+		       uint64_t numPkts,
+		       uint64_t amoEnable,
+		       uint64_t intrEnable,
+		       uint64_t peerIO,
+		       uint64_t amoHostDest,
+		       uint64_t amoModType, uint64_t intrHostDest,
+		       uint64_t intrVector)
+{
+	union dma_control rdma_control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union dma_localaddr local_addr;
+	union dma_hostaddr host_addr;
+
+	rdma_control.dma_control_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	local_addr.dma_localaddr_reg = 0;
+	host_addr.dma_hostaddr_reg = 0;
+
+	host_addr.dma_sys_addr = hostAddr;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_SYS_ADDR, host_addr.dma_hostaddr_reg);
+
+	local_addr.dma_ram_addr = localAddr;
+	local_addr.dma_ram_sel = localRamSel;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_LOC_ADDR, local_addr.dma_localaddr_reg);
+
+	rdma_control.dma_op_length = numPkts;
+	rdma_control.done_amo_en = amoEnable;
+	rdma_control.done_int_en = intrEnable;
+	rdma_control.pio_mem_n = peerIO;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_CTRL, rdma_control.dma_control_reg);
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_INT_DEST, intr_dest.intr_dest_reg);
+
+}
+
+static inline void mbcs_putdma_set(void *mmr,
+		       uint64_t hostAddr,
+		       uint64_t localAddr,
+		       uint64_t localRamSel,
+		       uint64_t numPkts,
+		       uint64_t amoEnable,
+		       uint64_t intrEnable,
+		       uint64_t peerIO,
+		       uint64_t amoHostDest,
+		       uint64_t amoModType,
+		       uint64_t intrHostDest, uint64_t intrVector)
+{
+	union dma_control wdma_control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union dma_localaddr local_addr;
+	union dma_hostaddr host_addr;
+
+	wdma_control.dma_control_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	local_addr.dma_localaddr_reg = 0;
+	host_addr.dma_hostaddr_reg = 0;
+
+	host_addr.dma_sys_addr = hostAddr;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_SYS_ADDR, host_addr.dma_hostaddr_reg);
+
+	local_addr.dma_ram_addr = localAddr;
+	local_addr.dma_ram_sel = localRamSel;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_LOC_ADDR, local_addr.dma_localaddr_reg);
+
+	wdma_control.dma_op_length = numPkts;
+	wdma_control.done_amo_en = amoEnable;
+	wdma_control.done_int_en = intrEnable;
+	wdma_control.pio_mem_n = peerIO;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_CTRL, wdma_control.dma_control_reg);
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_INT_DEST, intr_dest.intr_dest_reg);
+
+}
+
+static inline void mbcs_algo_set(void *mmr,
+		     uint64_t amoHostDest,
+		     uint64_t amoModType,
+		     uint64_t intrHostDest,
+		     uint64_t intrVector, uint64_t algoStepCount)
+{
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union algo_step step;
+
+	step.algo_step_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_ALG_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_ALG_INT_DEST, intr_dest.intr_dest_reg);
+
+	step.alg_step_cnt = algoStepCount;
+	MBCS_MMR_SET(mmr, MBCS_ALG_STEP, step.algo_step_reg);
+}
+
+static inline int mbcs_getdma_start(struct mbcs_soft *soft)
+{
+	void *mmr_base;
+	struct getdma *gdma;
+	uint64_t numPkts;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	gdma = &soft->getdma;
+
+	/* check that host address got setup */
+	if (!gdma->hostAddr)
+		return -1;
+
+	numPkts =
+	    (gdma->bytes + (MBCS_CACHELINE_SIZE - 1)) / MBCS_CACHELINE_SIZE;
+
+	/* program engine */
+	mbcs_getdma_set(mmr_base, tiocx_dma_addr(gdma->hostAddr),
+		   gdma->localAddr,
+		   (gdma->localAddr < MB2) ? 0 :
+		   (gdma->localAddr < MB4) ? 1 :
+		   (gdma->localAddr < MB6) ? 2 : 3,
+		   numPkts,
+		   gdma->DoneAmoEnable,
+		   gdma->DoneIntEnable,
+		   gdma->peerIO,
+		   gdma->amoHostDest,
+		   gdma->amoModType,
+		   gdma->intrHostDest, gdma->intrVector);
+
+	/* start engine */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.rd_dma_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+
+}
+
+static inline int mbcs_putdma_start(struct mbcs_soft *soft)
+{
+	void *mmr_base;
+	struct putdma *pdma;
+	uint64_t numPkts;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	pdma = &soft->putdma;
+
+	/* check that host address got setup */
+	if (!pdma->hostAddr)
+		return -1;
+
+	numPkts =
+	    (pdma->bytes + (MBCS_CACHELINE_SIZE - 1)) / MBCS_CACHELINE_SIZE;
+
+	/* program engine */
+	mbcs_putdma_set(mmr_base, tiocx_dma_addr(pdma->hostAddr),
+		   pdma->localAddr,
+		   (pdma->localAddr < MB2) ? 0 :
+		   (pdma->localAddr < MB4) ? 1 :
+		   (pdma->localAddr < MB6) ? 2 : 3,
+		   numPkts,
+		   pdma->DoneAmoEnable,
+		   pdma->DoneIntEnable,
+		   pdma->peerIO,
+		   pdma->amoHostDest,
+		   pdma->amoModType,
+		   pdma->intrHostDest, pdma->intrVector);
+
+	/* start engine */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.wr_dma_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+
+}
+
+static inline int mbcs_algo_start(struct mbcs_soft *soft)
+{
+	struct algoblock *algo_soft = &soft->algo;
+	void *mmr_base = soft->mmr_base;
+	union cm_control cm_control;
+
+	if (down_interruptible(&soft->algolock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->algo_done, 0);
+
+	mbcs_algo_set(mmr_base,
+		 algo_soft->amoHostDest,
+		 algo_soft->amoModType,
+		 algo_soft->intrHostDest,
+		 algo_soft->intrVector, algo_soft->algoStepCount);
+
+	/* start algorithm */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.alg_done_int_en = 1;
+	cm_control.alg_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	up(&soft->algolock);
+
+	return 0;
+}
+
+static inline ssize_t
+do_mbcs_sram_dmawrite(struct mbcs_soft *soft, uint64_t hostAddr,
+		      size_t len, loff_t * off)
+{
+	int rv = 0;
+
+	if (down_interruptible(&soft->dmawritelock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->dmawrite_done, 0);
+
+	soft->putdma.hostAddr = hostAddr;
+	soft->putdma.localAddr = *off;
+	soft->putdma.bytes = len;
+
+	if (mbcs_putdma_start(soft) < 0) {
+		DBG(KERN_ALERT "do_mbcs_sram_dmawrite: "
+					"mbcs_putdma_start failed\n");
+		rv = -EAGAIN;
+		goto dmawrite_exit;
+	}
+
+	if (wait_event_interruptible(soft->dmawrite_queue,
+					atomic_read(&soft->dmawrite_done))) {
+		rv = -ERESTARTSYS;
+		goto dmawrite_exit;
+	}
+
+	rv = len;
+	*off += len;
+
+dmawrite_exit:
+	up(&soft->dmawritelock);
+
+	return rv;
+}
+
+static inline ssize_t
+do_mbcs_sram_dmaread(struct mbcs_soft *soft, uint64_t hostAddr,
+		     size_t len, loff_t * off)
+{
+	int rv = 0;
+
+	if (down_interruptible(&soft->dmareadlock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->dmawrite_done, 0);
+
+	soft->getdma.hostAddr = hostAddr;
+	soft->getdma.localAddr = *off;
+	soft->getdma.bytes = len;
+
+	if (mbcs_getdma_start(soft) < 0) {
+		DBG(KERN_ALERT "mbcs_strategy: mbcs_getdma_start failed\n");
+		rv = -EAGAIN;
+		goto dmaread_exit;
+	}
+
+	if (wait_event_interruptible(soft->dmaread_queue,
+					atomic_read(&soft->dmaread_done))) {
+		rv = -ERESTARTSYS;
+		goto dmaread_exit;
+	}
+
+	rv = len;
+	*off += len;
+
+dmaread_exit:
+	up(&soft->dmareadlock);
+
+	return rv;
+}
+
+int mbcs_open(struct inode *ip, struct file *fp)
+{
+	struct mbcs_soft *soft;
+	int minor;
+
+	minor = iminor(ip);
+
+	list_for_each_entry(soft, &soft_list, list) {
+		if (soft->nasid == minor) {
+			fp->private_data = soft->cxdev;
+			return 0;
+		}
+	}
+
+	return -ENODEV;
+}
+
+ssize_t mbcs_sram_read(struct file * fp, char *buf, size_t len, loff_t * off)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t hostAddr;
+	int rv = 0;
+
+	hostAddr = __get_dma_pages(GFP_KERNEL, get_order(len));
+	if (hostAddr == 0)
+		return -ENOMEM;
+
+	rv = do_mbcs_sram_dmawrite(soft, hostAddr, len, off);
+	if (rv < 0)
+		goto exit;
+
+	if (copy_to_user(buf, (void *)hostAddr, len))
+		rv = -EFAULT;
+
+      exit:
+	free_pages(hostAddr, get_order(len));
+
+	return rv;
+}
+
+ssize_t
+mbcs_sram_write(struct file * fp, const char *buf, size_t len, loff_t * off)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t hostAddr;
+	int rv = 0;
+
+	hostAddr = __get_dma_pages(GFP_KERNEL, get_order(len));
+	if (hostAddr == 0)
+		return -ENOMEM;
+
+	if (copy_from_user((void *)hostAddr, buf, len)) {
+		rv = -EFAULT;
+		goto exit;
+	}
+
+	rv = do_mbcs_sram_dmaread(soft, hostAddr, len, off);
+
+      exit:
+	free_pages(hostAddr, get_order(len));
+
+	return rv;
+}
+
+loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence)
+{
+	loff_t newpos;
+
+	switch (whence) {
+	case 0:		/* SEEK_SET */
+		newpos = off;
+		break;
+
+	case 1:		/* SEEK_CUR */
+		newpos = filp->f_pos + off;
+		break;
+
+	case 2:		/* SEEK_END */
+		newpos = MBCS_SRAM_SIZE + off;
+		break;
+
+	default:		/* can't happen */
+		return -EINVAL;
+	}
+
+	if (newpos < 0)
+		return -EINVAL;
+
+	filp->f_pos = newpos;
+
+	return newpos;
+}
+
+static uint64_t mbcs_pioaddr(struct mbcs_soft *soft, uint64_t offset)
+{
+	uint64_t mmr_base;
+
+	mmr_base = (uint64_t) (soft->mmr_base + offset);
+
+	return mmr_base;
+}
+
+static void mbcs_debug_pioaddr_set(struct mbcs_soft *soft)
+{
+	soft->debug_addr = mbcs_pioaddr(soft, MBCS_DEBUG_START);
+}
+
+static void mbcs_gscr_pioaddr_set(struct mbcs_soft *soft)
+{
+	soft->gscr_addr = mbcs_pioaddr(soft, MBCS_GSCR_START);
+}
+
+int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	if (remap_pfn_range(vma,
+			    vma->vm_start,
+			    __pa(soft->gscr_addr) >> PAGE_SHIFT,
+			    PAGE_SIZE,
+			    vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * mbcs_completion_intr_handler - Primary completion handler.
+ * @irq: irq
+ * @arg: soft struct for device
+ * @ep: regs
+ *
+ */
+static irqreturn_t
+mbcs_completion_intr_handler(int irq, void *arg, struct pt_regs *ep)
+{
+	struct mbcs_soft *soft = (struct mbcs_soft *)arg;
+	void *mmr_base;
+	union cm_status cm_status;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	cm_status.cm_status_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_STATUS);
+
+	if (cm_status.rd_dma_done) {
+		/* stop dma-read engine, clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.rd_dma_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->dmaread_done, 1);
+		wake_up(&soft->dmaread_queue);
+	}
+	if (cm_status.wr_dma_done) {
+		/* stop dma-write engine, clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.wr_dma_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->dmawrite_done, 1);
+		wake_up(&soft->dmawrite_queue);
+	}
+	if (cm_status.alg_done) {
+		/* clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.alg_done_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->algo_done, 1);
+		wake_up(&soft->algo_queue);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mbcs_intr_alloc - Allocate interrupts.
+ * @dev: device pointer
+ *
+ */
+static int mbcs_intr_alloc(struct cx_dev *dev)
+{
+	struct sn_irq_info *sn_irq;
+	struct mbcs_soft *soft;
+	struct getdma *getdma;
+	struct putdma *putdma;
+	struct algoblock *algo;
+
+	soft = dev->soft;
+	getdma = &soft->getdma;
+	putdma = &soft->putdma;
+	algo = &soft->algo;
+
+	soft->get_sn_irq = NULL;
+	soft->put_sn_irq = NULL;
+	soft->algo_sn_irq = NULL;
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL)
+		return -EAGAIN;
+	soft->get_sn_irq = sn_irq;
+	getdma->intrHostDest = sn_irq->irq_xtalkaddr;
+	getdma->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS get intr", (void *)soft)) {
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL) {
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+	soft->put_sn_irq = sn_irq;
+	putdma->intrHostDest = sn_irq->irq_xtalkaddr;
+	putdma->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS put intr", (void *)soft)) {
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL) {
+		free_irq(soft->put_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+	soft->algo_sn_irq = sn_irq;
+	algo->intrHostDest = sn_irq->irq_xtalkaddr;
+	algo->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS algo intr", (void *)soft)) {
+		tiocx_irq_free(soft->algo_sn_irq);
+		free_irq(soft->put_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/**
+ * mbcs_intr_dealloc - Remove interrupts.
+ * @dev: device pointer
+ *
+ */
+static void mbcs_intr_dealloc(struct cx_dev *dev)
+{
+	struct mbcs_soft *soft;
+
+	soft = dev->soft;
+
+	free_irq(soft->get_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->get_sn_irq);
+	free_irq(soft->put_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->put_sn_irq);
+	free_irq(soft->algo_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->algo_sn_irq);
+}
+
+static inline int mbcs_hw_init(struct mbcs_soft *soft)
+{
+	void *mmr_base = soft->mmr_base;
+	union cm_control cm_control;
+	union cm_req_timeout cm_req_timeout;
+	uint64_t err_stat;
+
+	cm_req_timeout.cm_req_timeout_reg =
+	    MBCS_MMR_GET(mmr_base, MBCS_CM_REQ_TOUT);
+
+	cm_req_timeout.time_out = MBCS_CM_CONTROL_REQ_TOUT_MASK;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_REQ_TOUT,
+		     cm_req_timeout.cm_req_timeout_reg);
+
+	mbcs_gscr_pioaddr_set(soft);
+	mbcs_debug_pioaddr_set(soft);
+
+	/* clear errors */
+	err_stat = MBCS_MMR_GET(mmr_base, MBCS_CM_ERR_STAT);
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CLR_ERR_STAT, err_stat);
+	MBCS_MMR_ZERO(mmr_base, MBCS_CM_ERROR_DETAIL1);
+
+	/* enable interrupts */
+	/* turn off 2^23 (INT_EN_PIO_REQ_ADDR_INV) */
+	MBCS_MMR_SET(mmr_base, MBCS_CM_ERR_INT_EN, 0x3ffffff7e00ffUL);
+
+	/* arm status regs and clear engines */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.rearm_stat_regs = 1;
+	cm_control.alg_clr = 1;
+	cm_control.wr_dma_clr = 1;
+	cm_control.rd_dma_clr = 1;
+
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+}
+
+static ssize_t show_algo(struct device *dev, char *buf)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t debug0;
+
+	/*
+	 * By convention, the first debug register contains the
+	 * algorithm number and revision.
+	 */
+	debug0 = *(uint64_t *) soft->debug_addr;
+
+	return sprintf(buf, "0x%lx 0x%lx\n",
+		       (debug0 >> 32), (debug0 & 0xffffffff));
+}
+
+static ssize_t store_algo(struct device *dev, const char *buf, size_t count)
+{
+	int n;
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct mbcs_soft *soft = cx_dev->soft;
+
+	if (count <= 0)
+		return 0;
+
+	n = simple_strtoul(buf, NULL, 0);
+
+	if (n == 1) {
+		mbcs_algo_start(soft);
+		if (wait_event_interruptible(soft->algo_queue,
+					atomic_read(&soft->algo_done)))
+			return -ERESTARTSYS;
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(algo, 0644, show_algo, store_algo);
+
+/**
+ * mbcs_probe - Initialize for device
+ * @dev: device pointer
+ * @device_id: id table pointer
+ *
+ */
+static int mbcs_probe(struct cx_dev *dev, const struct cx_device_id *id)
+{
+	struct mbcs_soft *soft;
+
+	dev->soft = NULL;
+
+	soft = kcalloc(1, sizeof(struct mbcs_soft), GFP_KERNEL);
+	if (soft == NULL)
+		return -ENOMEM;
+
+	soft->nasid = dev->cx_id.nasid;
+	list_add(&soft->list, &soft_list);
+	soft->mmr_base = (void *)tiocx_swin_base(dev->cx_id.nasid);
+	dev->soft = soft;
+	soft->cxdev = dev;
+
+	init_waitqueue_head(&soft->dmawrite_queue);
+	init_waitqueue_head(&soft->dmaread_queue);
+	init_waitqueue_head(&soft->algo_queue);
+
+	init_MUTEX(&soft->dmawritelock);
+	init_MUTEX(&soft->dmareadlock);
+	init_MUTEX(&soft->algolock);
+
+	mbcs_getdma_init(&soft->getdma);
+	mbcs_putdma_init(&soft->putdma);
+	mbcs_algo_init(&soft->algo);
+
+	mbcs_hw_init(soft);
+
+	/* Allocate interrupts */
+	mbcs_intr_alloc(dev);
+
+	device_create_file(&dev->dev, &dev_attr_algo);
+
+	return 0;
+}
+
+static int mbcs_remove(struct cx_dev *dev)
+{
+	if (dev->soft) {
+		mbcs_intr_dealloc(dev);
+		kfree(dev->soft);
+	}
+
+	device_remove_file(&dev->dev, &dev_attr_algo);
+
+	return 0;
+}
+
+const struct cx_device_id __devinitdata mbcs_id_table[] = {
+	{
+	 .part_num = MBCS_PART_NUM,
+	 .mfg_num = MBCS_MFG_NUM,
+	 },
+	{
+	 .part_num = MBCS_PART_NUM_ALG0,
+	 .mfg_num = MBCS_MFG_NUM,
+	 },
+	{0, 0}
+};
+
+MODULE_DEVICE_TABLE(cx, mbcs_id_table);
+
+struct cx_drv mbcs_driver = {
+	.name = DEVICE_NAME,
+	.id_table = mbcs_id_table,
+	.probe = mbcs_probe,
+	.remove = mbcs_remove,
+};
+
+static void __exit mbcs_exit(void)
+{
+	int rv;
+
+	rv = unregister_chrdev(mbcs_major, DEVICE_NAME);
+	if (rv < 0)
+		DBG(KERN_ALERT "Error in unregister_chrdev: %d\n", rv);
+
+	cx_driver_unregister(&mbcs_driver);
+}
+
+static int __init mbcs_init(void)
+{
+	int rv;
+
+	// Put driver into chrdevs[].  Get major number.
+	rv = register_chrdev(mbcs_major, DEVICE_NAME, &mbcs_ops);
+	if (rv < 0) {
+		DBG(KERN_ALERT "mbcs_init: can't get major number. %d\n", rv);
+		return rv;
+	}
+	mbcs_major = rv;
+
+	return cx_driver_register(&mbcs_driver);
+}
+
+module_init(mbcs_init);
+module_exit(mbcs_exit);
+
+MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>");
+MODULE_DESCRIPTION("Driver for MOATB Core Services");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/mbcs.h b/drivers/char/mbcs.h
new file mode 100644
index 000000000000..844644d201c5
--- /dev/null
+++ b/drivers/char/mbcs.h
@@ -0,0 +1,553 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef __MBCS_H__
+#define __MBCS_H__
+
+/*
+ * General macros
+ */
+#define MB	(1024*1024)
+#define MB2	(2*MB)
+#define MB4	(4*MB)
+#define MB6	(6*MB)
+
+/*
+ * Offsets and masks
+ */
+#define MBCS_CM_ID		0x0000	/* Identification */
+#define MBCS_CM_STATUS		0x0008	/* Status */
+#define MBCS_CM_ERROR_DETAIL1	0x0010	/* Error Detail1 */
+#define MBCS_CM_ERROR_DETAIL2	0x0018	/* Error Detail2 */
+#define MBCS_CM_CONTROL		0x0020	/* Control */
+#define MBCS_CM_REQ_TOUT	0x0028	/* Request Time-out */
+#define MBCS_CM_ERR_INT_DEST	0x0038	/* Error Interrupt Destination */
+#define MBCS_CM_TARG_FL		0x0050	/* Target Flush */
+#define MBCS_CM_ERR_STAT	0x0060	/* Error Status */
+#define MBCS_CM_CLR_ERR_STAT	0x0068	/* Clear Error Status */
+#define MBCS_CM_ERR_INT_EN	0x0070	/* Error Interrupt Enable */
+#define MBCS_RD_DMA_SYS_ADDR	0x0100	/* Read DMA System Address */
+#define MBCS_RD_DMA_LOC_ADDR	0x0108	/* Read DMA Local Address */
+#define MBCS_RD_DMA_CTRL	0x0110	/* Read DMA Control */
+#define MBCS_RD_DMA_AMO_DEST	0x0118	/* Read DMA AMO Destination */
+#define MBCS_RD_DMA_INT_DEST	0x0120	/* Read DMA Interrupt Destination */
+#define MBCS_RD_DMA_AUX_STAT	0x0130	/* Read DMA Auxillary Status */
+#define MBCS_WR_DMA_SYS_ADDR	0x0200	/* Write DMA System Address */
+#define MBCS_WR_DMA_LOC_ADDR	0x0208	/* Write DMA Local Address */
+#define MBCS_WR_DMA_CTRL	0x0210	/* Write DMA Control */
+#define MBCS_WR_DMA_AMO_DEST	0x0218	/* Write DMA AMO Destination */
+#define MBCS_WR_DMA_INT_DEST	0x0220	/* Write DMA Interrupt Destination */
+#define MBCS_WR_DMA_AUX_STAT	0x0230	/* Write DMA Auxillary Status */
+#define MBCS_ALG_AMO_DEST	0x0300	/* Algorithm AMO Destination */
+#define MBCS_ALG_INT_DEST	0x0308	/* Algorithm Interrupt Destination */
+#define MBCS_ALG_OFFSETS	0x0310
+#define MBCS_ALG_STEP		0x0318	/* Algorithm Step */
+
+#define MBCS_GSCR_START		0x0000000
+#define MBCS_DEBUG_START	0x0100000
+#define MBCS_RAM0_START		0x0200000
+#define MBCS_RAM1_START		0x0400000
+#define MBCS_RAM2_START		0x0600000
+
+#define MBCS_CM_CONTROL_REQ_TOUT_MASK 0x0000000000ffffffUL
+//#define PIO_BASE_ADDR_BASE_OFFSET_MASK 0x00fffffffff00000UL
+
+#define MBCS_SRAM_SIZE		(1024*1024)
+#define MBCS_CACHELINE_SIZE	128
+
+/*
+ * MMR get's and put's
+ */
+#define MBCS_MMR_ADDR(mmr_base, offset)((uint64_t *)(mmr_base + offset))
+#define MBCS_MMR_SET(mmr_base, offset, value) {			\
+	uint64_t *mbcs_mmr_set_u64p, readback;				\
+	mbcs_mmr_set_u64p = (uint64_t *)(mmr_base + offset);	\
+	*mbcs_mmr_set_u64p = value;					\
+	readback = *mbcs_mmr_set_u64p; \
+}
+#define MBCS_MMR_GET(mmr_base, offset) *(uint64_t *)(mmr_base + offset)
+#define MBCS_MMR_ZERO(mmr_base, offset) MBCS_MMR_SET(mmr_base, offset, 0)
+
+/*
+ * MBCS mmr structures
+ */
+union cm_id {
+	uint64_t cm_id_reg;
+	struct {
+		uint64_t always_one:1,	// 0
+		 mfg_id:11,	// 11:1
+		 part_num:16,	// 27:12
+		 bitstream_rev:8,	// 35:28
+		:28;		// 63:36
+	};
+};
+
+union cm_status {
+	uint64_t cm_status_reg;
+	struct {
+		uint64_t pending_reads:8,	// 7:0
+		 pending_writes:8,	// 15:8
+		 ice_rsp_credits:8,	// 23:16
+		 ice_req_credits:8,	// 31:24
+		 cm_req_credits:8,	// 39:32
+		:1,		// 40
+		 rd_dma_in_progress:1,	// 41
+		 rd_dma_done:1,	// 42
+		:1,		// 43
+		 wr_dma_in_progress:1,	// 44
+		 wr_dma_done:1,	// 45
+		 alg_waiting:1,	// 46
+		 alg_pipe_running:1,	// 47
+		 alg_done:1,	// 48
+		:3,		// 51:49
+		 pending_int_reqs:8,	// 59:52
+		:3,		// 62:60
+		 alg_half_speed_sel:1;	// 63
+	};
+};
+
+union cm_error_detail1 {
+	uint64_t cm_error_detail1_reg;
+	struct {
+		uint64_t packet_type:4,	// 3:0
+		 source_id:2,	// 5:4
+		 data_size:2,	// 7:6
+		 tnum:8,	// 15:8
+		 byte_enable:8,	// 23:16
+		 gfx_cred:8,	// 31:24
+		 read_type:2,	// 33:32
+		 pio_or_memory:1,	// 34
+		 head_cw_error:1,	// 35
+		:12,		// 47:36
+		 head_error_bit:1,	// 48
+		 data_error_bit:1,	// 49
+		:13,		// 62:50
+		 valid:1;	// 63
+	};
+};
+
+union cm_error_detail2 {
+	uint64_t cm_error_detail2_reg;
+	struct {
+		uint64_t address:56,	// 55:0
+		:8;		// 63:56
+	};
+};
+
+union cm_control {
+	uint64_t cm_control_reg;
+	struct {
+		uint64_t cm_id:2,	// 1:0
+		:2,		// 3:2
+		 max_trans:5,	// 8:4
+		:3,		// 11:9
+		 address_mode:1,	// 12
+		:7,		// 19:13
+		 credit_limit:8,	// 27:20
+		:5,		// 32:28
+		 rearm_stat_regs:1,	// 33
+		 prescalar_byp:1,	// 34
+		 force_gap_war:1,	// 35
+		 rd_dma_go:1,	// 36
+		 wr_dma_go:1,	// 37
+		 alg_go:1,	// 38
+		 rd_dma_clr:1,	// 39
+		 wr_dma_clr:1,	// 40
+		 alg_clr:1,	// 41
+		:2,		// 43:42
+		 alg_wait_step:1,	// 44
+		 alg_done_amo_en:1,	// 45
+		 alg_done_int_en:1,	// 46
+		:1,		// 47
+		 alg_sram0_locked:1,	// 48
+		 alg_sram1_locked:1,	// 49
+		 alg_sram2_locked:1,	// 50
+		 alg_done_clr:1,	// 51
+		:12;		// 63:52
+	};
+};
+
+union cm_req_timeout {
+	uint64_t cm_req_timeout_reg;
+	struct {
+		uint64_t time_out:24,	// 23:0
+		:40;		// 63:24
+	};
+};
+
+union intr_dest {
+	uint64_t intr_dest_reg;
+	struct {
+		uint64_t address:56,	// 55:0
+		 int_vector:8;	// 63:56
+	};
+};
+
+union cm_error_status {
+	uint64_t cm_error_status_reg;
+	struct {
+		uint64_t ecc_sbe:1,	// 0
+		 ecc_mbe:1,	// 1
+		 unsupported_req:1,	// 2
+		 unexpected_rsp:1,	// 3
+		 bad_length:1,	// 4
+		 bad_datavalid:1,	// 5
+		 buffer_overflow:1,	// 6
+		 request_timeout:1,	// 7
+		:8,		// 15:8
+		 head_inv_data_size:1,	// 16
+		 rsp_pactype_inv:1,	// 17
+		 head_sb_err:1,	// 18
+		 missing_head:1,	// 19
+		 head_inv_rd_type:1,	// 20
+		 head_cmd_err_bit:1,	// 21
+		 req_addr_align_inv:1,	// 22
+		 pio_req_addr_inv:1,	// 23
+		 req_range_dsize_inv:1,	// 24
+		 early_term:1,	// 25
+		 early_tail:1,	// 26
+		 missing_tail:1,	// 27
+		 data_flit_sb_err:1,	// 28
+		 cm2hcm_req_cred_of:1,	// 29
+		 cm2hcm_rsp_cred_of:1,	// 30
+		 rx_bad_didn:1,	// 31
+		 rd_dma_err_rsp:1,	// 32
+		 rd_dma_tnum_tout:1,	// 33
+		 rd_dma_multi_tnum_tou:1,	// 34
+		 wr_dma_err_rsp:1,	// 35
+		 wr_dma_tnum_tout:1,	// 36
+		 wr_dma_multi_tnum_tou:1,	// 37
+		 alg_data_overflow:1,	// 38
+		 alg_data_underflow:1,	// 39
+		 ram0_access_conflict:1,	// 40
+		 ram1_access_conflict:1,	// 41
+		 ram2_access_conflict:1,	// 42
+		 ram0_perr:1,	// 43
+		 ram1_perr:1,	// 44
+		 ram2_perr:1,	// 45
+		 int_gen_rsp_err:1,	// 46
+		 int_gen_tnum_tout:1,	// 47
+		 rd_dma_prog_err:1,	// 48
+		 wr_dma_prog_err:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+union cm_clr_error_status {
+	uint64_t cm_clr_error_status_reg;
+	struct {
+		uint64_t clr_ecc_sbe:1,	// 0
+		 clr_ecc_mbe:1,	// 1
+		 clr_unsupported_req:1,	// 2
+		 clr_unexpected_rsp:1,	// 3
+		 clr_bad_length:1,	// 4
+		 clr_bad_datavalid:1,	// 5
+		 clr_buffer_overflow:1,	// 6
+		 clr_request_timeout:1,	// 7
+		:8,		// 15:8
+		 clr_head_inv_data_siz:1,	// 16
+		 clr_rsp_pactype_inv:1,	// 17
+		 clr_head_sb_err:1,	// 18
+		 clr_missing_head:1,	// 19
+		 clr_head_inv_rd_type:1,	// 20
+		 clr_head_cmd_err_bit:1,	// 21
+		 clr_req_addr_align_in:1,	// 22
+		 clr_pio_req_addr_inv:1,	// 23
+		 clr_req_range_dsize_i:1,	// 24
+		 clr_early_term:1,	// 25
+		 clr_early_tail:1,	// 26
+		 clr_missing_tail:1,	// 27
+		 clr_data_flit_sb_err:1,	// 28
+		 clr_cm2hcm_req_cred_o:1,	// 29
+		 clr_cm2hcm_rsp_cred_o:1,	// 30
+		 clr_rx_bad_didn:1,	// 31
+		 clr_rd_dma_err_rsp:1,	// 32
+		 clr_rd_dma_tnum_tout:1,	// 33
+		 clr_rd_dma_multi_tnum:1,	// 34
+		 clr_wr_dma_err_rsp:1,	// 35
+		 clr_wr_dma_tnum_tout:1,	// 36
+		 clr_wr_dma_multi_tnum:1,	// 37
+		 clr_alg_data_overflow:1,	// 38
+		 clr_alg_data_underflo:1,	// 39
+		 clr_ram0_access_confl:1,	// 40
+		 clr_ram1_access_confl:1,	// 41
+		 clr_ram2_access_confl:1,	// 42
+		 clr_ram0_perr:1,	// 43
+		 clr_ram1_perr:1,	// 44
+		 clr_ram2_perr:1,	// 45
+		 clr_int_gen_rsp_err:1,	// 46
+		 clr_int_gen_tnum_tout:1,	// 47
+		 clr_rd_dma_prog_err:1,	// 48
+		 clr_wr_dma_prog_err:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+union cm_error_intr_enable {
+	uint64_t cm_error_intr_enable_reg;
+	struct {
+		uint64_t int_en_ecc_sbe:1,	// 0
+		 int_en_ecc_mbe:1,	// 1
+		 int_en_unsupported_re:1,	// 2
+		 int_en_unexpected_rsp:1,	// 3
+		 int_en_bad_length:1,	// 4
+		 int_en_bad_datavalid:1,	// 5
+		 int_en_buffer_overflo:1,	// 6
+		 int_en_request_timeou:1,	// 7
+		:8,		// 15:8
+		 int_en_head_inv_data_:1,	// 16
+		 int_en_rsp_pactype_in:1,	// 17
+		 int_en_head_sb_err:1,	// 18
+		 int_en_missing_head:1,	// 19
+		 int_en_head_inv_rd_ty:1,	// 20
+		 int_en_head_cmd_err_b:1,	// 21
+		 int_en_req_addr_align:1,	// 22
+		 int_en_pio_req_addr_i:1,	// 23
+		 int_en_req_range_dsiz:1,	// 24
+		 int_en_early_term:1,	// 25
+		 int_en_early_tail:1,	// 26
+		 int_en_missing_tail:1,	// 27
+		 int_en_data_flit_sb_e:1,	// 28
+		 int_en_cm2hcm_req_cre:1,	// 29
+		 int_en_cm2hcm_rsp_cre:1,	// 30
+		 int_en_rx_bad_didn:1,	// 31
+		 int_en_rd_dma_err_rsp:1,	// 32
+		 int_en_rd_dma_tnum_to:1,	// 33
+		 int_en_rd_dma_multi_t:1,	// 34
+		 int_en_wr_dma_err_rsp:1,	// 35
+		 int_en_wr_dma_tnum_to:1,	// 36
+		 int_en_wr_dma_multi_t:1,	// 37
+		 int_en_alg_data_overf:1,	// 38
+		 int_en_alg_data_under:1,	// 39
+		 int_en_ram0_access_co:1,	// 40
+		 int_en_ram1_access_co:1,	// 41
+		 int_en_ram2_access_co:1,	// 42
+		 int_en_ram0_perr:1,	// 43
+		 int_en_ram1_perr:1,	// 44
+		 int_en_ram2_perr:1,	// 45
+		 int_en_int_gen_rsp_er:1,	// 46
+		 int_en_int_gen_tnum_t:1,	// 47
+		 int_en_rd_dma_prog_er:1,	// 48
+		 int_en_wr_dma_prog_er:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+struct cm_mmr {
+	union cm_id id;
+	union cm_status status;
+	union cm_error_detail1 err_detail1;
+	union cm_error_detail2 err_detail2;
+	union cm_control control;
+	union cm_req_timeout req_timeout;
+	uint64_t reserved1[1];
+	union intr_dest int_dest;
+	uint64_t reserved2[2];
+	uint64_t targ_flush;
+	uint64_t reserved3[1];
+	union cm_error_status err_status;
+	union cm_clr_error_status clr_err_status;
+	union cm_error_intr_enable int_enable;
+};
+
+union dma_hostaddr {
+	uint64_t dma_hostaddr_reg;
+	struct {
+		uint64_t dma_sys_addr:56,	// 55:0
+		:8;		// 63:56
+	};
+};
+
+union dma_localaddr {
+	uint64_t dma_localaddr_reg;
+	struct {
+		uint64_t dma_ram_addr:21,	// 20:0
+		 dma_ram_sel:2,	// 22:21
+		:41;		// 63:23
+	};
+};
+
+union dma_control {
+	uint64_t dma_control_reg;
+	struct {
+		uint64_t dma_op_length:16,	// 15:0
+		:18,		// 33:16
+		 done_amo_en:1,	// 34
+		 done_int_en:1,	// 35
+		:1,		// 36
+		 pio_mem_n:1,	// 37
+		:26;		// 63:38
+	};
+};
+
+union dma_amo_dest {
+	uint64_t dma_amo_dest_reg;
+	struct {
+		uint64_t dma_amo_sys_addr:56,	// 55:0
+		 dma_amo_mod_type:3,	// 58:56
+		:5;		// 63:59
+	};
+};
+
+union rdma_aux_status {
+	uint64_t rdma_aux_status_reg;
+	struct {
+		uint64_t op_num_pacs_left:17,	// 16:0
+		:5,		// 21:17
+		 lrsp_buff_empty:1,	// 22
+		:17,		// 39:23
+		 pending_reqs_left:6,	// 45:40
+		:18;		// 63:46
+	};
+};
+
+struct rdma_mmr {
+	union dma_hostaddr host_addr;
+	union dma_localaddr local_addr;
+	union dma_control control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union rdma_aux_status aux_status;
+};
+
+union wdma_aux_status {
+	uint64_t wdma_aux_status_reg;
+	struct {
+		uint64_t op_num_pacs_left:17,	// 16:0
+		:4,		// 20:17
+		 lreq_buff_empty:1,	// 21
+		:18,		// 39:22
+		 pending_reqs_left:6,	// 45:40
+		:18;		// 63:46
+	};
+};
+
+struct wdma_mmr {
+	union dma_hostaddr host_addr;
+	union dma_localaddr local_addr;
+	union dma_control control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union wdma_aux_status aux_status;
+};
+
+union algo_step {
+	uint64_t algo_step_reg;
+	struct {
+		uint64_t alg_step_cnt:16,	// 15:0
+		:48;		// 63:16
+	};
+};
+
+struct algo_mmr {
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union {
+		uint64_t algo_offset_reg;
+		struct {
+			uint64_t sram0_offset:7,	// 6:0
+			reserved0:1,	// 7
+			sram1_offset:7,	// 14:8
+			reserved1:1,	// 15
+			sram2_offset:7,	// 22:16
+			reserved2:14;	// 63:23
+		};
+	} sram_offset;
+	union algo_step step;
+};
+
+struct mbcs_mmr {
+	struct cm_mmr cm;
+	uint64_t reserved1[17];
+	struct rdma_mmr rdDma;
+	uint64_t reserved2[25];
+	struct wdma_mmr wrDma;
+	uint64_t reserved3[25];
+	struct algo_mmr algo;
+	uint64_t reserved4[156];
+};
+
+/*
+ * defines
+ */
+#define DEVICE_NAME "mbcs"
+#define MBCS_PART_NUM 0xfff0
+#define MBCS_PART_NUM_ALG0 0xf001
+#define MBCS_MFG_NUM  0x1
+
+struct algoblock {
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+	uint64_t algoStepCount;
+};
+
+struct getdma {
+	uint64_t hostAddr;
+	uint64_t localAddr;
+	uint64_t bytes;
+	uint64_t DoneAmoEnable;
+	uint64_t DoneIntEnable;
+	uint64_t peerIO;
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+};
+
+struct putdma {
+	uint64_t hostAddr;
+	uint64_t localAddr;
+	uint64_t bytes;
+	uint64_t DoneAmoEnable;
+	uint64_t DoneIntEnable;
+	uint64_t peerIO;
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+};
+
+struct mbcs_soft {
+	struct list_head list;
+	struct cx_dev *cxdev;
+	int major;
+	int nasid;
+	void *mmr_base;
+	wait_queue_head_t dmawrite_queue;
+	wait_queue_head_t dmaread_queue;
+	wait_queue_head_t algo_queue;
+	struct sn_irq_info *get_sn_irq;
+	struct sn_irq_info *put_sn_irq;
+	struct sn_irq_info *algo_sn_irq;
+	struct getdma getdma;
+	struct putdma putdma;
+	struct algoblock algo;
+	uint64_t gscr_addr;	// pio addr
+	uint64_t ram0_addr;	// pio addr
+	uint64_t ram1_addr;	// pio addr
+	uint64_t ram2_addr;	// pio addr
+	uint64_t debug_addr;	// pio addr
+	atomic_t dmawrite_done;
+	atomic_t dmaread_done;
+	atomic_t algo_done;
+	struct semaphore dmawritelock;
+	struct semaphore dmareadlock;
+	struct semaphore algolock;
+};
+
+extern int mbcs_open(struct inode *ip, struct file *fp);
+extern ssize_t mbcs_sram_read(struct file *fp, char *buf, size_t len,
+			      loff_t * off);
+extern ssize_t mbcs_sram_write(struct file *fp, const char *buf, size_t len,
+			       loff_t * off);
+extern loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence);
+extern int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma);
+
+#endif				// __MBCS_H__
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
index ae0bc99d573e..960d626ee589 100644
--- a/include/asm-ia64/sn/addrs.h
+++ b/include/asm-ia64/sn/addrs.h
@@ -169,7 +169,10 @@
 #define TIO_BWIN_SIZE_BITS		30	/* big window size: 1G */
 #define NODE_SWIN_BASE(n, w)		((w == 0) ? NODE_BWIN_BASE((n), SWIN0_BIGWIN) \
 		: RAW_NODE_SWIN_BASE(n, w))
+#define TIO_SWIN_BASE(n, w) 		(TIO_IO_BASE(n) + \
+					    ((u64) (w) << TIO_SWIN_SIZE_BITS))
 #define NODE_IO_BASE(n)			(GLOBAL_MMR_SPACE | NASID_SPACE(n))
+#define TIO_IO_BASE(n)                  (UNCACHED | NASID_SPACE(n))
 #define BWIN_SIZE			(1UL << BWIN_SIZE_BITS)
 #define NODE_BWIN_BASE0(n)		(NODE_IO_BASE(n) + BWIN_SIZE)
 #define NODE_BWIN_BASE(n, w)		(NODE_BWIN_BASE0(n) + ((u64) (w) << BWIN_SIZE_BITS))
diff --git a/include/asm-ia64/sn/tiocx.h b/include/asm-ia64/sn/tiocx.h
new file mode 100644
index 000000000000..c5447a504509
--- /dev/null
+++ b/include/asm-ia64/sn/tiocx.h
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_TIOCX_H
+#define _ASM_IA64_SN_TIO_TIOCX_H
+
+#ifdef __KERNEL__
+
+struct cx_id_s {
+	unsigned int part_num;
+	unsigned int mfg_num;
+	int nasid;
+};
+
+struct cx_dev {
+	struct cx_id_s cx_id;
+	void *soft;			/* driver specific */
+	struct hubdev_info *hubdev;
+	struct device dev;
+	struct cx_drv *driver;
+};
+
+struct cx_device_id {
+	unsigned int part_num;
+	unsigned int mfg_num;
+};
+
+struct cx_drv {
+	char *name;
+	const struct cx_device_id *id_table;
+	struct device_driver driver;
+	int (*probe) (struct cx_dev * dev, const struct cx_device_id * id);
+	int (*remove) (struct cx_dev * dev);
+};
+
+/* create DMA address by stripping AS bits */
+#define TIOCX_DMA_ADDR(a) (uint64_t)((uint64_t)(a) & 0xffffcfffffffffUL)
+
+#define TIOCX_TO_TIOCX_DMA_ADDR(a) (uint64_t)(((uint64_t)(a) & 0xfffffffff) |  \
+                                  ((((uint64_t)(a)) & 0xffffc000000000UL) <<2))
+
+#define TIO_CE_ASIC_PARTNUM 0xce00
+#define TIOCX_CORELET 3
+
+/* These are taken from tio_mmr_as.h */
+#define TIO_ICE_FRZ_CFG               TIO_MMR_ADDR_MOD(0x00000000b0008100UL)
+#define TIO_ICE_PMI_TX_CFG            TIO_MMR_ADDR_MOD(0x00000000b000b100UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3 TIO_MMR_ADDR_MOD(0x00000000b000be18UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK 0x000000000000000fUL
+
+#define to_cx_dev(n) container_of(n, struct cx_dev, dev)
+#define to_cx_driver(drv) container_of(drv, struct cx_drv, driver)
+
+extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int);
+extern void tiocx_irq_free(struct sn_irq_info *);
+extern int cx_device_unregister(struct cx_dev *);
+extern int cx_device_register(nasid_t, int, int, struct hubdev_info *);
+extern int cx_driver_unregister(struct cx_drv *);
+extern int cx_driver_register(struct cx_drv *);
+extern uint64_t tiocx_dma_addr(uint64_t addr);
+extern uint64_t tiocx_swin_base(int nasid);
+extern void tiocx_mmr_store(int nasid, uint64_t offset, uint64_t value);
+extern uint64_t tiocx_mmr_load(int nasid, uint64_t offset);
+
+#endif				//  __KERNEL__
+#endif				// _ASM_IA64_SN_TIO_TIOCX__

From c1298c5c7e5763a3b2fd4a9535d474ff6e54cd53 Mon Sep 17 00:00:00 2001
From: Aaron J Young <ayoung@sgi.com>
Date: Mon, 25 Apr 2005 13:11:14 -0700
Subject: [PATCH 14/34] [IA64-SGI] Altix: enable poweroff

This patch adds the necessary "hook" to allow SGI/SN
machines to perform a system power off upon a
'init 0', 'halt -p', 'poweroff' or 'shutdown -h'.

The "hook" is to set the pm_power_off callback
to ia64_sn_power_down(). pm_power_off is checked
in machine_power_off()/do_poweroff() and, if set, is executed.
ia64_sn_power_down() is a function already present (but not
used currently) in the sn kernel.
ia64_sn_power_down() makes a SAL call to execute the
power off.

Signed-off-by: Aaron J Young <ayoung@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/setup.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index f0306b516afb..d35f2a6f9c94 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/sched.h>
 #include <linux/root_dev.h>
 #include <linux/nodemask.h>
+#include <linux/pm.h>
 
 #include <asm/io.h>
 #include <asm/sal.h>
@@ -353,6 +354,14 @@ void __init sn_setup(char **cmdline_p)
 	screen_info = sn_screen_info;
 
 	sn_timer_init();
+
+	/*
+	 * set pm_power_off to a SAL call to allow
+	 * sn machines to power off. The SAL call can be replaced
+	 * by an ACPI interface call when ACPI is fully implemented
+	 * for sn.
+	 */
+	pm_power_off = ia64_sn_power_down;
 }
 
 /**

From ff3eb55ed97db3f12964beeffe3d34602d295367 Mon Sep 17 00:00:00 2001
From: Bruce Losure <blosure@sgi.com>
Date: Mon, 25 Apr 2005 13:12:02 -0700
Subject: [PATCH 15/34] [IA64-SGI]

Missed the "bk new" for this file in the last commit.

Signed-off-by: Bruce Losure <blosure@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/tiocx.c | 548 ++++++++++++++++++++++++++++++++++++
 1 file changed, 548 insertions(+)
 create mode 100644 arch/ia64/sn/kernel/tiocx.c

diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
new file mode 100644
index 000000000000..66190d7e492d
--- /dev/null
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -0,0 +1,548 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/types.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/tiocx.h>
+#include "tio.h"
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+#define CX_DEV_NONE 0
+#define DEVICE_NAME "tiocx"
+#define WIDGET_ID 0
+#define TIOCX_DEBUG 0
+
+#if TIOCX_DEBUG
+#define DBG(fmt...)    printk(KERN_ALERT fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct device_attribute dev_attr_cxdev_control;
+
+/**
+ * tiocx_match - Try to match driver id list with device.
+ * @dev: device pointer
+ * @drv: driver pointer
+ *
+ * Returns 1 if match, 0 otherwise.
+ */
+static int tiocx_match(struct device *dev, struct device_driver *drv)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = to_cx_driver(drv);
+	const struct cx_device_id *ids = cx_drv->id_table;
+
+	if (!ids)
+		return 0;
+
+	while (ids->part_num) {
+		if (ids->part_num == cx_dev->cx_id.part_num)
+			return 1;
+		ids++;
+	}
+	return 0;
+
+}
+
+static int tiocx_hotplug(struct device *dev, char **envp, int num_envp,
+			 char *buffer, int buffer_size)
+{
+	return -ENODEV;
+}
+
+static void tiocx_bus_release(struct device *dev)
+{
+	kfree(to_cx_dev(dev));
+}
+
+struct bus_type tiocx_bus_type = {
+	.name = "tiocx",
+	.match = tiocx_match,
+	.hotplug = tiocx_hotplug,
+};
+
+/**
+ * cx_device_match - Find cx_device in the id table.
+ * @ids: id table from driver
+ * @cx_device: part/mfg id for the device
+ *
+ */
+static const struct cx_device_id *cx_device_match(const struct cx_device_id
+						  *ids,
+						  struct cx_dev *cx_device)
+{
+	/*
+	 * NOTES: We may want to check for CX_ANY_ID too.
+	 *        Do we want to match against nasid too?
+	 *        CX_DEV_NONE == 0, if the driver tries to register for
+	 *        part/mfg == 0 we should return no-match (NULL) here.
+	 */
+	while (ids->part_num && ids->mfg_num) {
+		if (ids->part_num == cx_device->cx_id.part_num &&
+		    ids->mfg_num == cx_device->cx_id.mfg_num)
+			return ids;
+		ids++;
+	}
+
+	return NULL;
+}
+
+/**
+ * cx_device_probe - Look for matching device.
+ *			Call driver probe routine if found.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * @cx_device: part/mfg id for the device
+ */
+static int cx_device_probe(struct device *dev)
+{
+	const struct cx_device_id *id;
+	struct cx_drv *cx_drv = to_cx_driver(dev->driver);
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	int error = 0;
+
+	if (!cx_dev->driver && cx_drv->probe) {
+		id = cx_device_match(cx_drv->id_table, cx_dev);
+		if (id) {
+			if ((error = cx_drv->probe(cx_dev, id)) < 0)
+				return error;
+			else
+				cx_dev->driver = cx_drv;
+		}
+	}
+
+	return error;
+}
+
+/**
+ * cx_driver_remove - Remove driver from device struct.
+ * @dev: device
+ */
+static int cx_driver_remove(struct device *dev)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = cx_dev->driver;
+	if (cx_drv->remove)
+		cx_drv->remove(cx_dev);
+	cx_dev->driver = NULL;
+	return 0;
+}
+
+/**
+ * cx_driver_register - Register the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * 
+ * Called from the driver init routine to register a driver.
+ * The cx_drv struct contains the driver name, a pointer to
+ * a table of part/mfg numbers and a pointer to the driver's
+ * probe/attach routine.
+ */
+int cx_driver_register(struct cx_drv *cx_driver)
+{
+	cx_driver->driver.name = cx_driver->name;
+	cx_driver->driver.bus = &tiocx_bus_type;
+	cx_driver->driver.probe = cx_device_probe;
+	cx_driver->driver.remove = cx_driver_remove;
+
+	return driver_register(&cx_driver->driver);
+}
+
+/**
+ * cx_driver_unregister - Unregister the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ */
+int cx_driver_unregister(struct cx_drv *cx_driver)
+{
+	driver_unregister(&cx_driver->driver);
+	return 0;
+}
+
+/**
+ * cx_device_register - Register a device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ * @hubdev: hub info associated with this device
+ *
+ */
+int
+cx_device_register(nasid_t nasid, int part_num, int mfg_num,
+		   struct hubdev_info *hubdev)
+{
+	struct cx_dev *cx_dev;
+
+	cx_dev = kcalloc(1, sizeof(struct cx_dev), GFP_KERNEL);
+	DBG("cx_dev= 0x%p\n", cx_dev);
+	if (cx_dev == NULL)
+		return -ENOMEM;
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+	cx_dev->cx_id.nasid = nasid;
+	cx_dev->hubdev = hubdev;
+
+	cx_dev->dev.parent = NULL;
+	cx_dev->dev.bus = &tiocx_bus_type;
+	cx_dev->dev.release = tiocx_bus_release;
+	snprintf(cx_dev->dev.bus_id, BUS_ID_SIZE, "%d.0x%x",
+		 cx_dev->cx_id.nasid, cx_dev->cx_id.part_num);
+	device_register(&cx_dev->dev);
+	get_device(&cx_dev->dev);
+
+	device_create_file(&cx_dev->dev, &dev_attr_cxdev_control);
+
+	return 0;
+}
+
+/**
+ * cx_device_unregister - Unregister a device.
+ * @cx_dev: part/mfg id for the device
+ */
+int cx_device_unregister(struct cx_dev *cx_dev)
+{
+	put_device(&cx_dev->dev);
+	device_unregister(&cx_dev->dev);
+	return 0;
+}
+
+/**
+ * cx_device_reload - Reload the device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ *
+ * Remove the device associated with 'nasid' from device list and then
+ * call device-register with the given part/mfg numbers.
+ */
+static int cx_device_reload(struct cx_dev *cx_dev)
+{
+	device_remove_file(&cx_dev->dev, &dev_attr_cxdev_control);
+	cx_device_unregister(cx_dev);
+	return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
+				  cx_dev->cx_id.mfg_num, cx_dev->hubdev);
+}
+
+static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
+					u64 sn_irq_info,
+					int req_irq, nasid_t req_nasid,
+					int req_slice)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_ALLOC, nasid,
+				widget, sn_irq_info, req_irq,
+				req_nasid, req_slice);
+	return rv.status;
+}
+
+static inline void tiocx_intr_free(nasid_t nasid, int widget,
+				   struct sn_irq_info *sn_irq_info)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_FREE, nasid,
+				widget, sn_irq_info->irq_irq,
+				sn_irq_info->irq_cookie, 0, 0);
+}
+
+struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
+				    nasid_t req_nasid, int slice)
+{
+	struct sn_irq_info *sn_irq_info;
+	int status;
+	int sn_irq_size = sizeof(struct sn_irq_info);
+
+	if ((nasid & 1) == 0)
+		return NULL;
+
+	sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL);
+	if (sn_irq_info == NULL)
+		return NULL;
+
+	memset(sn_irq_info, 0x0, sn_irq_size);
+
+	status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq,
+				  req_nasid, slice);
+	if (status) {
+		kfree(sn_irq_info);
+		return NULL;
+	} else {
+		return sn_irq_info;
+	}
+}
+
+void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
+{
+	uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+	nasid_t nasid = NASID_GET(bridge);
+	int widget;
+
+	if (nasid & 1) {
+		widget = TIO_SWIN_WIDGETNUM(bridge);
+		tiocx_intr_free(nasid, widget, sn_irq_info);
+		kfree(sn_irq_info);
+	}
+}
+
+uint64_t
+tiocx_dma_addr(uint64_t addr)
+{
+	return PHYS_TO_TIODMA(addr);
+}
+
+uint64_t
+tiocx_swin_base(int nasid)
+{
+	return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
+}
+
+EXPORT_SYMBOL(cx_driver_register);
+EXPORT_SYMBOL(cx_driver_unregister);
+EXPORT_SYMBOL(cx_device_register);
+EXPORT_SYMBOL(cx_device_unregister);
+EXPORT_SYMBOL(tiocx_irq_alloc);
+EXPORT_SYMBOL(tiocx_irq_free);
+EXPORT_SYMBOL(tiocx_bus_type);
+EXPORT_SYMBOL(tiocx_dma_addr);
+EXPORT_SYMBOL(tiocx_swin_base);
+
+static uint64_t tiocx_get_hubdev_info(u64 handle, u64 address)
+{
+
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&ret_stuff,
+				SN_SAL_IOIF_GET_HUBDEV_INFO,
+				handle, address, 0, 0, 0, 0, 0);
+	return ret_stuff.v0;
+}
+
+static void tio_conveyor_set(nasid_t nasid, int enable_flag)
+{
+	uint64_t ice_frz;
+	uint64_t disable_cb = (1ull << 61);
+
+	if (!(nasid & 1))
+		return;
+
+	ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG);
+	if (enable_flag) {
+		if (!(ice_frz & disable_cb))	/* already enabled */
+			return;
+		ice_frz &= ~disable_cb;
+	} else {
+		if (ice_frz & disable_cb)	/* already disabled */
+			return;
+		ice_frz |= disable_cb;
+	}
+	DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz);
+	REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz);
+}
+
+#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1)
+#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0)
+
+static void tio_corelet_reset(nasid_t nasid, int corelet)
+{
+	if (!(nasid & 1))
+		return;
+
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet);
+	udelay(2000);
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0);
+	udelay(2000);
+}
+
+static int fpga_attached(nasid_t nasid)
+{
+	uint64_t cx_credits;
+
+	cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3);
+	cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK;
+	DBG("cx_credits= 0x%lx\n", cx_credits);
+
+	return (cx_credits == 0xf) ? 1 : 0;
+}
+
+static int tiocx_reload(struct cx_dev *cx_dev)
+{
+	int part_num = CX_DEV_NONE;
+	int mfg_num = CX_DEV_NONE;
+	nasid_t nasid = cx_dev->cx_id.nasid;
+
+	if (fpga_attached(nasid)) {
+		uint64_t cx_id;
+
+		cx_id =
+		    *(volatile int32_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
+					  WIDGET_ID);
+		part_num = XWIDGET_PART_NUM(cx_id);
+		mfg_num = XWIDGET_MFG_NUM(cx_id);
+		DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
+		/* just ignore it if it's a CE */
+		if (part_num == TIO_CE_ASIC_PARTNUM)
+			return 0;
+	}
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+
+	/*
+	 * Delete old device and register the new one.  It's ok if
+	 * part_num/mfg_num == CX_DEV_NONE.  We want to register
+	 * devices in the table even if a bitstream isn't loaded.
+	 * That allows use to see that a bitstream isn't loaded via
+	 * TIOCX_IOCTL_DEV_LIST.
+	 */
+	return cx_device_reload(cx_dev);
+}
+
+static ssize_t show_cxdev_control(struct device *dev, char *buf)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	return sprintf(buf, "0x%x 0x%x 0x%x\n",
+		       cx_dev->cx_id.nasid,
+		       cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num);
+}
+
+static ssize_t store_cxdev_control(struct device *dev, const char *buf,
+				   size_t count)
+{
+	int n;
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count <= 0)
+		return 0;
+
+	n = simple_strtoul(buf, NULL, 0);
+
+	switch (n) {
+	case 1:
+		tiocx_reload(cx_dev);
+		break;
+	case 3:
+		tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET);
+		break;
+	default:
+		break;
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control);
+
+static int __init tiocx_init(void)
+{
+	cnodeid_t cnodeid;
+	int found_tiocx_device = 0;
+
+	bus_register(&tiocx_bus_type);
+
+	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
+		nasid_t nasid;
+
+		if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
+			break;	/* No more nasids .. bail out of loop */
+
+		if (nasid & 0x1) {	/* TIO's are always odd */
+			struct hubdev_info *hubdev;
+			uint64_t status;
+			struct xwidget_info *widgetp;
+
+			DBG("Found TIO at nasid 0x%x\n", nasid);
+
+			hubdev =
+			    (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo);
+			status =
+			    tiocx_get_hubdev_info(nasid,
+						  (uint64_t) __pa(hubdev));
+			if (status)
+				continue;
+
+			widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET];
+
+			/* The CE hangs off of the CX port but is not an FPGA */
+			if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM)
+				continue;
+
+			tio_corelet_reset(nasid, TIOCX_CORELET);
+			tio_conveyor_enable(nasid);
+
+			if (cx_device_register
+			    (nasid, widgetp->xwi_hwid.part_num,
+			     widgetp->xwi_hwid.mfg_num, hubdev) < 0)
+				return -ENXIO;
+			else
+				found_tiocx_device++;
+		}
+	}
+
+	/* It's ok if we find zero devices. */
+	DBG("found_tiocx_device= %d\n", found_tiocx_device);
+
+	return 0;
+}
+
+static void __exit tiocx_exit(void)
+{
+	struct device *dev;
+	struct device *tdev;
+
+	DBG("tiocx_exit\n");
+
+	/*
+	 * Unregister devices.
+	 */
+	list_for_each_entry_safe(dev, tdev, &tiocx_bus_type.devices.list,
+				 bus_list) {
+		if (dev) {
+			struct cx_dev *cx_dev = to_cx_dev(dev);
+			device_remove_file(dev, &dev_attr_cxdev_control);
+			cx_device_unregister(cx_dev);
+		}
+	}
+
+	bus_unregister(&tiocx_bus_type);
+}
+
+module_init(tiocx_init);
+module_exit(tiocx_exit);
+
+/************************************************************************
+ * Module licensing and description
+ ************************************************************************/
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>");
+MODULE_DESCRIPTION("TIOCX module");
+MODULE_SUPPORTED_DEVICE(DEVICE_NAME);

From fde740e4dd4a05ca8957490d468fa9b2770f5bd6 Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Mon, 25 Apr 2005 13:13:16 -0700
Subject: [PATCH 16/34] [IA64] Percpu quicklist for combined allocator for
 pgd/pmd/pte.

This patch introduces using the quicklists for pgd, pmd, and pte levels
by combining the alloc and free functions into a common set of routines.
This greatly simplifies the reading of this header file.

This patch is simple but necessary for large numa configurations.
It simply ensures that only pages from the local node are added to a
cpus quicklist.  This prevents the trapping of pages on a remote nodes
quicklist by starting a process, touching a large number of pages to
fill pmd and pte entries, migrating to another node, and then unmapping
or exiting.  With those conditions, the pages get trapped and if the
machine has more than 100 nodes of the same size, the calculation of
the pgtable high water mark will be larger than any single node so page
table cache flushing will never occur.

I ran lmbench lat_proc fork and lat_proc exec on a zx1 with and without
this patch and did not notice any change.

On an sn2 machine, there was a slight improvement which is possibly
due to pages from other nodes trapped on the test node before starting
the run.  I did not investigate further.

This patch shrinks the quicklist based upon free memory on the node
instead of the high/low water marks.  I have written it to enable
preemption periodically and recalculate the amount to shrink every time
we have freed enough pages that the quicklist size should have grown.
I rescan the nodes zones each pass because other processess may be
draining node memory at the same time as we are adding.

Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/contig.c        |   3 +-
 arch/ia64/mm/discontig.c     |   3 +-
 arch/ia64/mm/init.c          |  78 ++++++++++-------
 include/asm-ia64/pgalloc.h   | 158 +++++++++++++++--------------------
 include/asm-ia64/processor.h |   3 -
 5 files changed, 121 insertions(+), 124 deletions(-)

diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 6daf15ac8940..91a055f5731f 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -61,7 +61,8 @@ show_mem (void)
 	printk("%d reserved pages\n", reserved);
 	printk("%d pages shared\n", shared);
 	printk("%d pages swap cached\n", cached);
-	printk("%ld pages in page table cache\n", pgtable_cache_size);
+	printk("%ld pages in page table cache\n",
+		pgtable_quicklist_total_size());
 }
 
 /* physical address where the bootmem map is located */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 3456a9b6971e..c00710929390 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -582,7 +582,8 @@ void show_mem(void)
 	printk("%d reserved pages\n", total_reserved);
 	printk("%d pages shared\n", total_shared);
 	printk("%d pages swap cached\n", total_cached);
-	printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
+	printk("Total of %ld pages in page table cache\n",
+		pgtable_quicklist_total_size());
 	printk("%d free buffer pages\n", nr_free_buffer_pages());
 }
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 65cf839573ea..4892be53e227 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -39,6 +39,9 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
+DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+DEFINE_PER_CPU(long, __pgtable_quicklist_size);
+
 extern void ia64_tlb_init (void);
 
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -50,27 +53,53 @@ struct page *vmem_map;
 EXPORT_SYMBOL(vmem_map);
 #endif
 
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr;		/* map entry for zero page */
+struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
-void
-check_pgt_cache (void)
-{
-	int low, high;
+#define MIN_PGT_PAGES			25UL
+#define MAX_PGT_FREES_PER_PASS		16
+#define PGT_FRACTION_OF_NODE_MEM	16
 
-	low = pgt_cache_water[0];
-	high = pgt_cache_water[1];
+static inline long
+max_pgt_pages(void)
+{
+	u64 node_free_pages, max_pgt_pages;
+
+#ifndef	CONFIG_NUMA
+	node_free_pages = nr_free_pages();
+#else
+	node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+	max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+	max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+	return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+	long pages_to_free;
+
+	pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+	pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+	return pages_to_free;
+}
+
+void
+check_pgt_cache(void)
+{
+	long pages_to_free;
+
+	if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+		return;
 
 	preempt_disable();
-	if (pgtable_cache_size > (u64) high) {
-		do {
-			if (pgd_quicklist)
-				free_page((unsigned long)pgd_alloc_one_fast(NULL));
-			if (pmd_quicklist)
-				free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
-		} while (pgtable_cache_size > (u64) low);
+	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+		while (pages_to_free--) {
+			free_page((unsigned long)pgtable_quicklist_alloc());
+		}
+		preempt_enable();
+		preempt_disable();
 	}
 	preempt_enable();
 }
@@ -523,11 +552,14 @@ void
 mem_init (void)
 {
 	long reserved_pages, codesize, datasize, initsize;
-	unsigned long num_pgt_pages;
 	pg_data_t *pgdat;
 	int i;
 	static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
 
+	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
+
 #ifdef CONFIG_PCI
 	/*
 	 * This needs to be called _after_ the command line has been parsed but _before_
@@ -564,18 +596,6 @@ mem_init (void)
 	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
 	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
 
-	/*
-	 * Allow for enough (cached) page table pages so that we can map the entire memory
-	 * at least once.  Each task also needs a couple of page tables pages, so add in a
-	 * fudge factor for that (don't use "threads-max" here; that would be wrong!).
-	 * Don't allow the cache to be more than 10% of total memory, though.
-	 */
-#	define NUM_TASKS	500	/* typical number of tasks */
-	num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
-	if (num_pgt_pages > nr_free_pages() / 10)
-		num_pgt_pages = nr_free_pages() / 10;
-	if (num_pgt_pages > (u64) pgt_cache_water[1])
-		pgt_cache_water[1] = num_pgt_pages;
 
 	/*
 	 * For fsyscall entrpoints with no light-weight handler, use the ordinary
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 0f05dc8bd460..e86a8c331ee6 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -22,146 +22,124 @@
 
 #include <asm/mmu_context.h>
 
-/*
- * Very stupidly, we used to get new pgd's and pmd's, init their contents
- * to point to the NULL versions of the next level page table, later on
- * completely re-init them the same way, then free them up.  This wasted
- * a lot of work and caused unnecessary memory traffic.  How broken...
- * We fix this by caching them.
- */
-#define pgd_quicklist		(local_cpu_data->pgd_quick)
-#define pmd_quicklist		(local_cpu_data->pmd_quick)
-#define pgtable_cache_size	(local_cpu_data->pgtable_cache_sz)
+DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
+#define pgtable_quicklist __ia64_per_cpu_var(__pgtable_quicklist)
+DECLARE_PER_CPU(long, __pgtable_quicklist_size);
+#define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size)
 
-static inline pgd_t*
-pgd_alloc_one_fast (struct mm_struct *mm)
+static inline long pgtable_quicklist_total_size(void)
+{
+	long ql_size;
+	int cpuid;
+
+	for_each_online_cpu(cpuid) {
+		ql_size += per_cpu(__pgtable_quicklist_size, cpuid);
+	}
+	return ql_size;
+}
+
+static inline void *pgtable_quicklist_alloc(void)
 {
 	unsigned long *ret = NULL;
 
 	preempt_disable();
 
-	ret = pgd_quicklist;
+	ret = pgtable_quicklist;
 	if (likely(ret != NULL)) {
-		pgd_quicklist = (unsigned long *)(*ret);
+		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
-		--pgtable_cache_size;
-	} else
-		ret = NULL;
-
-	preempt_enable();
-
-	return (pgd_t *) ret;
-}
-
-static inline pgd_t*
-pgd_alloc (struct mm_struct *mm)
-{
-	/* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
-	pgd_t *pgd = pgd_alloc_one_fast(mm);
-
-	if (unlikely(pgd == NULL)) {
-		pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+		--pgtable_quicklist_size;
+	} else {
+		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	}
-	return pgd;
+
+	preempt_enable();
+
+	return ret;
 }
 
-static inline void
-pgd_free (pgd_t *pgd)
+static inline void pgtable_quicklist_free(void *pgtable_entry)
 {
+#ifdef CONFIG_NUMA
+	unsigned long nid = page_to_nid(virt_to_page(pgtable_entry));
+
+	if (unlikely(nid != numa_node_id())) {
+		free_page((unsigned long)pgtable_entry);
+		return;
+	}
+#endif
+
 	preempt_disable();
-	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
-	pgd_quicklist = (unsigned long *) pgd;
-	++pgtable_cache_size;
+	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
+	pgtable_quicklist = (unsigned long *)pgtable_entry;
+	++pgtable_quicklist_size;
 	preempt_enable();
 }
 
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return pgtable_quicklist_alloc();
+}
+
+static inline void pgd_free(pgd_t * pgd)
+{
+	pgtable_quicklist_free(pgd);
+}
+
 static inline void
-pud_populate (struct mm_struct *mm, pud_t *pud_entry, pmd_t *pmd)
+pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 {
 	pud_val(*pud_entry) = __pa(pmd);
 }
 
-static inline pmd_t*
-pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	unsigned long *ret = NULL;
-
-	preempt_disable();
-
-	ret = (unsigned long *)pmd_quicklist;
-	if (likely(ret != NULL)) {
-		pmd_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		--pgtable_cache_size;
-	}
-
-	preempt_enable();
-
-	return (pmd_t *)ret;
+	return pgtable_quicklist_alloc();
 }
 
-static inline pmd_t*
-pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+static inline void pmd_free(pmd_t * pmd)
 {
-	pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-
-	return pmd;
-}
-
-static inline void
-pmd_free (pmd_t *pmd)
-{
-	preempt_disable();
-	*(unsigned long *)pmd = (unsigned long) pmd_quicklist;
-	pmd_quicklist = (unsigned long *) pmd;
-	++pgtable_cache_size;
-	preempt_enable();
+	pgtable_quicklist_free(pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)
 
 static inline void
-pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
 {
 	pmd_val(*pmd_entry) = page_to_phys(pte);
 }
 
 static inline void
-pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 {
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline struct page *
-pte_alloc_one (struct mm_struct *mm, unsigned long addr)
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+					 unsigned long addr)
 {
-	struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-
-	return pte;
+	return virt_to_page(pgtable_quicklist_alloc());
 }
 
-static inline pte_t *
-pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long addr)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-
-	return pte;
+	return pgtable_quicklist_alloc();
 }
 
-static inline void
-pte_free (struct page *pte)
+static inline void pte_free(struct page *pte)
 {
-	__free_page(pte);
+	pgtable_quicklist_free(page_address(pte));
 }
 
-static inline void
-pte_free_kernel (pte_t *pte)
+static inline void pte_free_kernel(pte_t * pte)
 {
-	free_page((unsigned long) pte);
+	pgtable_quicklist_free(pte);
 }
 
-#define __pte_free_tlb(tlb, pte)	tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb, pte)	pte_free(pte)
 
-extern void check_pgt_cache (void);
+extern void check_pgt_cache(void);
 
-#endif /* _ASM_IA64_PGALLOC_H */
+#endif				/* _ASM_IA64_PGALLOC_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 2807f8d766d4..983798ec1791 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -137,9 +137,6 @@ struct cpuinfo_ia64 {
 	__u64 nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
 	__u64 unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
 	__u64 unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
-	__u64 *pgd_quick;
-	__u64 *pmd_quick;
-	__u64 pgtable_cache_sz;
 	__u64 itc_freq;		/* frequency of ITC counter */
 	__u64 proc_freq;	/* frequency of processor */
 	__u64 cyc_per_usec;	/* itc_freq/1000000 */

From 5f6602a101993592b437b801c401443bec65d0cf Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Mon, 25 Apr 2005 13:14:36 -0700
Subject: [PATCH 17/34] [IA64] sba_iommu bug fixes

   This fixes a couple of bugs in the zx1/sx1000 sba_iommu.  These are
all pretty low likelihood of hitting.  The first problem is a simple off
by one, deep in the sba_alloc_range() error path.  Surrounding that was
a lock ordering problem that could have potentially deadlocked with the
order the locks are grabbed in sba_unmap_single().  I moved the resource
locking into sba_search_bitmap() to prevent this.  Finally, there's a
potential race between unmapping pdir entries and marking incoming DMA
pages clean.  If you see any oddities, please let me know, but I've
tested it pretty thoroughly here.  Tony, please apply.  Thanks,

BTW, many of the options in this driver not on by default are becoming
more and more broken.  I'll be working on some patches to clean them
out, but I wanted to get this bug fix out first.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/hp/common/sba_iommu.c | 96 +++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 40 deletions(-)

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 017c9ab5fc1b..6a8fcba7a853 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1,9 +1,9 @@
 /*
 **  IA64 System Bus Adapter (SBA) I/O MMU manager
 **
-**	(c) Copyright 2002-2004 Alex Williamson
+**	(c) Copyright 2002-2005 Alex Williamson
 **	(c) Copyright 2002-2003 Grant Grundler
-**	(c) Copyright 2002-2004 Hewlett-Packard Company
+**	(c) Copyright 2002-2005 Hewlett-Packard Company
 **
 **	Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
 **	Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
@@ -459,21 +459,32 @@ get_iovp_order (unsigned long size)
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
  * @bits_wanted: number of entries we need.
+ * @use_hint: use res_hint to indicate where to start looking
  *
  * Find consecutive free bits in resource bitmap.
  * Each bit represents one entry in the IO Pdir.
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted)
+sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 {
-	unsigned long *res_ptr = ioc->res_hint;
+	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long pide = ~0UL;
+	unsigned long flags, pide = ~0UL;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	spin_lock_irqsave(&ioc->res_lock, flags);
+
+	/* Allow caller to force a search through the entire resource space */
+	if (likely(use_hint)) {
+		res_ptr = ioc->res_hint;
+	} else {
+		res_ptr = (ulong *)ioc->res_map;
+		ioc->res_bitshift = 0;
+	}
+
 	/*
 	 * N.B.  REO/Grande defect AR2305 can cause TLB fetch timeouts
 	 * if a TLB entry is purged while in use.  sba_mark_invalid()
@@ -570,10 +581,12 @@ not_found:
 	prefetch(ioc->res_map);
 	ioc->res_hint = (unsigned long *) ioc->res_map;
 	ioc->res_bitshift = 0;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
 	return (pide);
 
 found_it:
 	ioc->res_hint = res_ptr;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
 	return (pide);
 }
 
@@ -594,36 +607,36 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 	unsigned long itc_start;
 #endif
 	unsigned long pide;
-	unsigned long flags;
 
 	ASSERT(pages_needed);
 	ASSERT(0 == (size & ~iovp_mask));
 
-	spin_lock_irqsave(&ioc->res_lock, flags);
-
 #ifdef PDIR_SEARCH_TIMING
 	itc_start = ia64_get_itc();
 #endif
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed);
+	pide = sba_search_bitmap(ioc, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed);
+		pide = sba_search_bitmap(ioc, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
+			unsigned long flags;
+
 			/*
 			** With delayed resource freeing, we can give this one more shot.  We're
 			** getting close to being in trouble here, so do what we can to make this
 			** one count.
 			*/
-			spin_lock(&ioc->saved_lock);
+			spin_lock_irqsave(&ioc->saved_lock, flags);
 			if (ioc->saved_cnt > 0) {
 				struct sba_dma_pair *d;
 				int cnt = ioc->saved_cnt;
 
-				d = &(ioc->saved[ioc->saved_cnt]);
+				d = &(ioc->saved[ioc->saved_cnt - 1]);
 
+				spin_lock(&ioc->res_lock);
 				while (cnt--) {
 					sba_mark_invalid(ioc, d->iova, d->size);
 					sba_free_range(ioc, d->iova, d->size);
@@ -631,10 +644,11 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 				}
 				ioc->saved_cnt = 0;
 				READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
+				spin_unlock(&ioc->res_lock);
 			}
-			spin_unlock(&ioc->saved_lock);
+			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed);
+			pide = sba_search_bitmap(ioc, pages_needed, 0);
 			if (unlikely(pide >= (ioc->res_size << 3)))
 				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
 				      ioc->ioc_hpa);
@@ -664,8 +678,6 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 		(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
 		ioc->res_bitshift );
 
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-
 	return (pide);
 }
 
@@ -950,6 +962,30 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 	return SBA_IOVA(ioc, iovp, offset);
 }
 
+#ifdef ENABLE_MARK_CLEAN
+static SBA_INLINE void
+sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+	u32	iovp = (u32) SBA_IOVP(ioc,iova);
+	int	off = PDIR_INDEX(iovp);
+	void	*addr;
+
+	if (size <= iovp_size) {
+		addr = phys_to_virt(ioc->pdir_base[off] &
+		                    ~0xE000000000000FFFULL);
+		mark_clean(addr, size);
+	} else {
+		do {
+			addr = phys_to_virt(ioc->pdir_base[off] &
+			                    ~0xE000000000000FFFULL);
+			mark_clean(addr, min(size, iovp_size));
+			off++;
+			size -= iovp_size;
+		} while (size > 0);
+	}
+}
+#endif
+
 /**
  * sba_unmap_single - unmap one IOVA and free resources
  * @dev: instance of PCI owned by the driver that's asking.
@@ -995,6 +1031,10 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 	size += offset;
 	size = ROUNDUP(size, iovp_size);
 
+#ifdef ENABLE_MARK_CLEAN
+	if (dir == DMA_FROM_DEVICE)
+		sba_mark_clean(ioc, iova, size);
+#endif
 
 #if DELAYED_RESOURCE_CNT > 0
 	spin_lock_irqsave(&ioc->saved_lock, flags);
@@ -1021,30 +1061,6 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 	READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif /* DELAYED_RESOURCE_CNT == 0 */
-#ifdef ENABLE_MARK_CLEAN
-	if (dir == DMA_FROM_DEVICE) {
-		u32 iovp = (u32) SBA_IOVP(ioc,iova);
-		int off = PDIR_INDEX(iovp);
-		void *addr;
-
-		if (size <= iovp_size) {
-			addr = phys_to_virt(ioc->pdir_base[off] &
-					    ~0xE000000000000FFFULL);
-			mark_clean(addr, size);
-		} else {
-			size_t byte_cnt = size;
-
-			do {
-				addr = phys_to_virt(ioc->pdir_base[off] &
-				                    ~0xE000000000000FFFULL);
-				mark_clean(addr, min(byte_cnt, iovp_size));
-				off++;
-				byte_cnt -= iovp_size;
-
-			   } while (byte_cnt > 0);
-		}
-	}
-#endif
 }
 
 

From c411cb56586915350e4cdb6f228e9da2adba3285 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Apr 2005 13:16:16 -0700
Subject: [PATCH 18/34] [IA64] fix: warning: `ql_size' might be used
 uninitialized

Oops.  Should have caught this before I checked it in.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/pgalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index e86a8c331ee6..2b7127330ae1 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -29,7 +29,7 @@ DECLARE_PER_CPU(long, __pgtable_quicklist_size);
 
 static inline long pgtable_quicklist_total_size(void)
 {
-	long ql_size;
+	long ql_size = 0;
 	int cpuid;
 
 	for_each_online_cpu(cpuid) {

From e96c9b4779e651a7469bea677be3a08f70be399e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Apr 2005 13:16:59 -0700
Subject: [PATCH 19/34] [IA64] MAX_PGT_FREES_PER_PASS must be 'L' to avoid
 warning

'min' is very picky about types of arguments, make it happy

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 4892be53e227..547785e3cba2 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -57,7 +57,7 @@ struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
 #define MIN_PGT_PAGES			25UL
-#define MAX_PGT_FREES_PER_PASS		16
+#define MAX_PGT_FREES_PER_PASS		16L
 #define PGT_FRACTION_OF_NODE_MEM	16
 
 static inline long

From 4628d7cada7a19166ba8fe57f5ef0f0009694e1e Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Mon, 25 Apr 2005 13:18:02 -0700
Subject: [PATCH 20/34] [IA64-SGI] disable TIOCA GART TLB prefetching

Patch to disable SGI TIOCA GART TLB prefetching due to hw bug.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/pci/tioca_provider.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 2234d61cdd4b..54a0dd447e76 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -171,15 +171,15 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 	 *      use agp op-combining
 	 *      use GET semantics to fetch memory
 	 *      participate in coherency domain
-	 *      prefetch TLB entries
+	 * 	DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029
 	 */
 
 	ca_base->ca_control1 |= CA_AGPDMA_OP_ENB_COMBDELAY;	/* PV895469 ? */
 	ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
 	ca_base->ca_control2 |= (0x2ull << CA_GART_MEM_PARAM_SHFT);
 	tioca_kern->ca_gart_iscoherent = 1;
-	ca_base->ca_control2 |=
-	    (CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB);
+	ca_base->ca_control2 &=
+	    ~(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB);
 
 	/*
 	 * Unmask GART fetch error interrupts.  Clear residual errors first.

From 95ff439a517835aa2bdf725fafbb025a63984289 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Mon, 25 Apr 2005 13:19:11 -0700
Subject: [PATCH 21/34] [IA64-SGI] Add new MMR definitions/Modify BTE
 initialiation&copy.

patch 1:
	Add new MMR definitions.
	Modify BTE initialiation.
	Modify BTE copy.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/bte.c      | 20 +++++++------
 include/asm-ia64/sn/bte.h      | 53 ++++++++++++++++++++++++++++++----
 include/asm-ia64/sn/nodepda.h  |  4 +--
 include/asm-ia64/sn/pda.h      |  3 +-
 include/asm-ia64/sn/shub_mmr.h | 20 ++++++++++++-
 5 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index ce0bc4085eae..647deae9bfcd 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/config.h>
@@ -170,10 +170,6 @@ retry_bteop:
 	/* Initialize the notification to a known value. */
 	*bte->most_rcnt_na = BTE_WORD_BUSY;
 
-	/* Set the status reg busy bit and transfer length */
-	BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
-	BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size);
-
 	/* Set the source and destination registers */
 	BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
 	BTE_SRC_STORE(bte, TO_PHYS(src));
@@ -188,7 +184,7 @@ retry_bteop:
 
 	/* Initiate the transfer */
 	BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
-	BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
+	BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode));
 
 	itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
 
@@ -429,10 +425,16 @@ void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
 	mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
 
 	for (i = 0; i < BTES_PER_NODE; i++) {
+		u64 *base_addr;
+
 		/* Which link status register should we use? */
-		unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1);
-		mynodepda->bte_if[i].bte_base_addr = (u64 *)
-		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status);
+		base_addr = (u64 *)
+		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
+		mynodepda->bte_if[i].bte_base_addr = base_addr;
+		mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);
 
 		/*
 		 * Initialize the notification and spinlock
diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h
index 0ec27f99c181..f50da3d91d07 100644
--- a/include/asm-ia64/sn/bte.h
+++ b/include/asm-ia64/sn/bte.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -13,8 +13,12 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <linux/cache.h>
+#include <asm/sn/pda.h>
 #include <asm/sn/types.h>
+#include <asm/sn/shub_mmr.h>
 
+#define IBCT_NOTIFY             (0x1UL << 4)
+#define IBCT_ZFIL_MODE          (0x1UL << 0)
 
 /* #define BTE_DEBUG */
 /* #define BTE_DEBUG_VERBOSE */
@@ -39,8 +43,36 @@
 
 
 /* Define hardware */
-#define BTES_PER_NODE 2
+#define BTES_PER_NODE (is_shub2() ? 4 : 2)
+#define MAX_BTES_PER_NODE 4
 
+#define BTE2OFF_CTRL	(0)
+#define BTE2OFF_SRC	(SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_DEST	(SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_NOTIFY	(SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0)
+
+#define BTE_BASE_ADDR(interface) 				\
+    (is_shub2() ? (interface == 0) ? SH2_BT_ENG_CSR_0 :		\
+		  (interface == 1) ? SH2_BT_ENG_CSR_1 :		\
+		  (interface == 2) ? SH2_BT_ENG_CSR_2 :		\
+		  		     SH2_BT_ENG_CSR_3 		\
+		: (interface == 0) ? IIO_IBLS0 : IIO_IBLS1)
+
+#define BTE_SOURCE_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_SRC/8) 			\
+		: base + (BTEOFF_SRC/8))
+
+#define BTE_DEST_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_DEST/8) 			\
+		: base + (BTEOFF_DEST/8))
+
+#define BTE_CTRL_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_CTRL/8) 			\
+		: base + (BTEOFF_CTRL/8))
+
+#define BTE_NOTIF_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_NOTIFY/8) 			\
+		: base + (BTEOFF_NOTIFY/8))
 
 /* Define hardware modes */
 #define BTE_NOTIFY (IBCT_NOTIFY)
@@ -68,14 +100,18 @@
 #define BTE_LNSTAT_STORE(_bte, _x)					\
 			HUB_S(_bte->bte_base_addr, (_x))
 #define BTE_SRC_STORE(_bte, _x)						\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_SRC/8), (_x))
+			HUB_S(_bte->bte_source_addr, (_x))
 #define BTE_DEST_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_DEST/8), (_x))
+			HUB_S(_bte->bte_destination_addr, (_x))
 #define BTE_CTRL_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_CTRL/8), (_x))
+			HUB_S(_bte->bte_control_addr, (_x))
 #define BTE_NOTIF_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_NOTIFY/8), (_x))
+			HUB_S(_bte->bte_notify_addr, (_x))
 
+#define BTE_START_TRANSFER(_bte, _len, _mode)				\
+	is_shub2() ? BTE_CTRL_STORE(_bte, IBLS_BUSY | (_mode << 24) | _len) \
+		: BTE_LNSTAT_STORE(_bte, _len);				\
+		  BTE_CTRL_STORE(_bte, _mode)
 
 /* Possible results from bte_copy and bte_unaligned_copy */
 /* The following error codes map into the BTE hardware codes
@@ -110,6 +146,10 @@ typedef enum {
 struct bteinfo_s {
 	volatile u64 notify ____cacheline_aligned;
 	u64 *bte_base_addr ____cacheline_aligned;
+	u64 *bte_source_addr;
+	u64 *bte_destination_addr;
+	u64 *bte_control_addr;
+	u64 *bte_notify_addr;
 	spinlock_t spinlock;
 	cnodeid_t bte_cnode;	/* cnode                            */
 	int bte_error_count;	/* Number of errors encountered     */
@@ -117,6 +157,7 @@ struct bteinfo_s {
 	int cleanup_active;	/* Interface is locked for cleanup  */
 	volatile bte_result_t bh_error;	/* error while processing   */
 	volatile u64 *most_rcnt_na;
+	struct bteinfo_s *btes_to_try[MAX_BTES_PER_NODE];
 };
 
 
diff --git a/include/asm-ia64/sn/nodepda.h b/include/asm-ia64/sn/nodepda.h
index 2fbde33656e6..13cc1002b294 100644
--- a/include/asm-ia64/sn/nodepda.h
+++ b/include/asm-ia64/sn/nodepda.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #ifndef _ASM_IA64_SN_NODEPDA_H
 #define _ASM_IA64_SN_NODEPDA_H
@@ -43,7 +43,7 @@ struct nodepda_s {
 	/*
 	 * The BTEs on this node are shared by the local cpus
 	 */
-	struct bteinfo_s	bte_if[BTES_PER_NODE];	/* Virtual Interface */
+	struct bteinfo_s	bte_if[MAX_BTES_PER_NODE];	/* Virtual Interface */
 	struct timer_list	bte_recovery_timer;
 	spinlock_t		bte_recovery_lock;
 
diff --git a/include/asm-ia64/sn/pda.h b/include/asm-ia64/sn/pda.h
index e940d3647c80..cd19f17bf91a 100644
--- a/include/asm-ia64/sn/pda.h
+++ b/include/asm-ia64/sn/pda.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #ifndef _ASM_IA64_SN_PDA_H
 #define _ASM_IA64_SN_PDA_H
@@ -11,7 +11,6 @@
 #include <linux/cache.h>
 #include <asm/percpu.h>
 #include <asm/system.h>
-#include <asm/sn/bte.h>
 
 
 /*
diff --git a/include/asm-ia64/sn/shub_mmr.h b/include/asm-ia64/sn/shub_mmr.h
index 6ec37e816a9e..2f885088e095 100644
--- a/include/asm-ia64/sn/shub_mmr.h
+++ b/include/asm-ia64/sn/shub_mmr.h
@@ -4,7 +4,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
  */
 
 #ifndef _ASM_IA64_SN_SHUB_MMR_H
@@ -455,4 +455,22 @@
 #define SH_INT_CMPC		shubmmr(SH, INT_CMPC)
 #define SH_INT_CMPD		shubmmr(SH, INT_CMPD)
 
+/* ========================================================================== */
+/*                        Register "SH2_BT_ENG_CSR_0"                         */
+/*                    Engine 0 Control and Status Register                    */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_0                         0x0000000030040000
+#define SH2_BT_ENG_SRC_ADDR_0                    0x0000000030040080
+#define SH2_BT_ENG_DEST_ADDR_0                   0x0000000030040100
+#define SH2_BT_ENG_NOTIF_ADDR_0                  0x0000000030040180
+
+/* ========================================================================== */
+/*                       BTE interfaces 1-3                                   */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_1                         0x0000000030050000
+#define SH2_BT_ENG_CSR_2                         0x0000000030060000
+#define SH2_BT_ENG_CSR_3                         0x0000000030070000
+
 #endif /* _ASM_IA64_SN_SHUB_MMR_H */

From 93a07d0a0e7b013ee73fb39d4edb07b47288912e Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Mon, 25 Apr 2005 13:19:52 -0700
Subject: [PATCH 22/34] [IA64-SGI] Shub2 BTE support - BTE recovery code

patch 2:
	Shub2 BTE recovery code will be implemented in SAL.
	Define the SAL interface.
	Modify bte_error to call SAL for shub2.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/bte_error.c | 76 +++++++++++++++++++++------------
 arch/ia64/sn/kernel/huberror.c  |  9 ++--
 include/asm-ia64/sn/sn_sal.h    | 19 ++++++++-
 3 files changed, 71 insertions(+), 33 deletions(-)

diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index fd104312c6bd..fcbc748ae433 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
@@ -33,48 +33,28 @@ void bte_error_handler(unsigned long);
  * Wait until all BTE related CRBs are completed
  * and then reset the interfaces.
  */
-void bte_error_handler(unsigned long _nodepda)
+void shub1_bte_error_handler(unsigned long _nodepda)
 {
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
-	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
 	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
 	nasid_t nasid;
 	int i;
 	int valid_crbs;
-	unsigned long irq_flags;
-	volatile u64 *notify;
-	bte_result_t bh_error;
 	ii_imem_u_t imem;	/* II IMEM Register */
 	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
 	ii_ibcr_u_t ibcr;
 	ii_icmr_u_t icmr;
 	ii_ieclr_u_t ieclr;
 
-	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+	BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda,
 		    smp_processor_id()));
 
-	spin_lock_irqsave(recovery_lock, irq_flags);
-
 	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
 	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
 		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
 			    smp_processor_id()));
-		spin_unlock_irqrestore(recovery_lock, irq_flags);
 		return;
 	}
-	/*
-	 * Lock all interfaces on this node to prevent new transfers
-	 * from being queued.
-	 */
-	for (i = 0; i < BTES_PER_NODE; i++) {
-		if (err_nodepda->bte_if[i].cleanup_active) {
-			continue;
-		}
-		spin_lock(&err_nodepda->bte_if[i].spinlock);
-		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
-			    smp_processor_id(), i));
-		err_nodepda->bte_if[i].cleanup_active = 1;
-	}
 
 	/* Determine information about our hub */
 	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
@@ -101,7 +81,6 @@ void bte_error_handler(unsigned long _nodepda)
 		mod_timer(recovery_timer, HZ * 5);
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
-		spin_unlock_irqrestore(recovery_lock, irq_flags);
 		return;
 	}
 	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
@@ -120,8 +99,6 @@ void bte_error_handler(unsigned long _nodepda)
 				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
 					    err_nodepda, smp_processor_id(),
 					    i));
-				spin_unlock_irqrestore(recovery_lock,
-						       irq_flags);
 				return;
 			}
 		}
@@ -146,6 +123,51 @@ void bte_error_handler(unsigned long _nodepda)
 	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
 	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
 
+	del_timer(recovery_timer);
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+void bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
+	int i;
+	nasid_t nasid;
+	unsigned long irq_flags;
+	volatile u64 *notify;
+	bte_result_t bh_error;
+
+	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+		    smp_processor_id()));
+
+	spin_lock_irqsave(recovery_lock, irq_flags);
+
+	/*
+	 * Lock all interfaces on this node to prevent new transfers
+	 * from being queued.
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		if (err_nodepda->bte_if[i].cleanup_active) {
+			continue;
+		}
+		spin_lock(&err_nodepda->bte_if[i].spinlock);
+		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
+			    smp_processor_id(), i));
+		err_nodepda->bte_if[i].cleanup_active = 1;
+	}
+
+	if (is_shub1()) {
+		shub1_bte_error_handler(_nodepda);
+	} else {
+		nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+		if (ia64_sn_bte_recovery(nasid))
+			panic("bte_error_handler(): Fatal BTE Error");
+	}
+
 	for (i = 0; i < BTES_PER_NODE; i++) {
 		bh_error = err_nodepda->bte_if[i].bh_error;
 		if (bh_error != BTE_SUCCESS) {
@@ -165,8 +187,6 @@ void bte_error_handler(unsigned long _nodepda)
 		spin_unlock(&err_nodepda->bte_if[i].spinlock);
 	}
 
-	del_timer(recovery_timer);
-
 	spin_unlock_irqrestore(recovery_lock, irq_flags);
 }
 
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 2bdf684c5066..5c39b43ba3c0 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/types.h>
@@ -38,8 +38,11 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
 	if ((int)ret_stuff.v0)
 		panic("hubii_eint_handler(): Fatal TIO Error");
 
-	if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
-		(void)hubiio_crb_error_handler(hubdev_info);
+	if (is_shub1()) {
+		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
+			(void)hubiio_crb_error_handler(hubdev_info);
+	} else 
+		bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid)));
 
 	return IRQ_HANDLED;
 }
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 410d356b40da..581f9a783045 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -8,7 +8,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All rights reserved.
  */
 
 
@@ -77,7 +77,7 @@
 #define  SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000059
 
 #define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
-
+#define SN_SAL_BTE_RECOVER			   0x02000061
 
 /*
  * Service-specific constants
@@ -1023,4 +1023,19 @@ ia64_sn_ioif_get_pci_topology(u64 rack, u64 bay, u64 slot, u64 slab,
 	return (int) rv.status;
 }
 
+/*
+ * BTE error recovery is implemented in SAL
+ */
+static inline int
+ia64_sn_bte_recovery(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+
+	rv.status = 0;
+	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, 0, 0, 0, 0, 0, 0, 0);
+	if (rv.status == SALRET_NOT_IMPLEMENTED)
+		return 0;
+	return (int) rv.status;
+}
+
 #endif /* _ASM_IA64_SN_SN_SAL_H */

From a37d98f6a98254c05315e0bbf45c4602942d14b1 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@hpl.hp.com>
Date: Mon, 25 Apr 2005 13:20:38 -0700
Subject: [PATCH 23/34] [IA64] fix syscall-optimization goof

Sadly, I goofed in this syscall-tuning patch:

ChangeSet 1.1966.1.40 2005/01/22 13:31:05 davidm@hpl.hp.com
  [IA64] Improve ia64_leave_syscall() for McKinley-type cores.

  Optimize ia64_leave_syscall() a bit better for McKinley-type cores.
  The patch looks big, but that's mostly due to renaming r16/r17 to r2/r3.
  Good for a 13 cycle improvement.

The problem is that the size of the physical stacked registers was
loaded into the wrong register (r3 instead of r17).  Since r17 by
coincidence always had the value 1, this had the effect of turning
rse_clear_invalid into a no-op.  That poses the risk of leaking kernel
state back to user-land and is hence not acceptable.

The fix below is simple, but unfortunately it costs us about 28 cycles
in syscall overhead. ;-(

Unfortunately, there isn't much we can do about that since those
registers have to be cleared one way or another.

	--david

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 73e23dafe8e9..bd86fea49a0c 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -759,7 +759,7 @@ ENTRY(ia64_leave_syscall)
 (pUStk) st1 [r14]=r17
 	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
 	;;
-(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
+(pUStk)	ld4 r17=[r3]		// r17 = cpu_data->phys_stacked_size_p8
 	mov.m ar.csd=r0		// M2 clear ar.csd
 	mov b6=r18		// I0  restore b6
 	;;

From 0985ea8f2db87d32b0b750229889e55fed7458ef Mon Sep 17 00:00:00 2001
From: Mark Goodwin <markgw@sgi.com>
Date: Mon, 25 Apr 2005 13:21:54 -0700
Subject: [PATCH 24/34] [IA64-SGI] Altix SN add support for slots in geoid_t
 locator

This patch against ia64-test-2.6.12 is needed for forthcoming
Altix chipsets. It renames geoid_any_t to geoid_common_t and
splits the 8bit 'slab' field into two 4bit fields for 'slab'
and 'slot'. Similar changes in the Altix SAL will retain backward
compatibility for old kernels.

Signed-off-by: Mark Goodwin <markgw@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/sn2/sn_hwperf.c | 69 ++++++++++-------------------
 include/asm-ia64/sn/geo.h           | 45 +++++++++++--------
 include/asm-ia64/sn/sn_sal.h        |  4 +-
 include/asm-ia64/sn/types.h         |  3 +-
 4 files changed, 54 insertions(+), 67 deletions(-)

diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index e731fcb95f90..833e700fdac9 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -116,7 +116,7 @@ static int sn_hwperf_geoid_to_cnode(char *location)
 		module_id = geo_module(geoid);
 		this_rack = MODULE_GET_RACK(module_id);
 		this_bay = MODULE_GET_BPOS(module_id);
-		this_slot = 0; /* XXX */
+		this_slot = geo_slot(geoid);
 		this_slab = geo_slab(geoid);
 		if (rack == this_rack && bay == this_bay &&
 			slot == this_slot && slab == this_slab) {
@@ -176,20 +176,27 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
 
 static void print_pci_topology(struct seq_file *s,
 	struct sn_hwperf_object_info *obj, int *ordinal,
-	char *pci_topo_buf, int len)
+	u64 rack, u64 bay, u64 slot, u64 slab)
 {
 	char *p1;
 	char *p2;
+	char *pg;
 
-	for (p1=pci_topo_buf; *p1 && p1 < pci_topo_buf + len;) {
-		if (!(p2 = strchr(p1, '\n')))
-			break;
-		*p2 = '\0';
-		seq_printf(s, "pcibus %d %s-%s\n",
-			*ordinal, obj->location, p1);
-		(*ordinal)++;
-		p1 = p2 + 1;
+	if (!(pg = (char *)get_zeroed_page(GFP_KERNEL)))
+		return; /* ignore */
+	if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab,
+		__pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) {
+		for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) {
+			if (!(p2 = strchr(p1, '\n')))
+				break;
+			*p2 = '\0';
+			seq_printf(s, "pcibus %d %s-%s\n",
+				*ordinal, obj->location, p1);
+			(*ordinal)++;
+			p1 = p2 + 1;
+		}
 	}
+	free_page((unsigned long)pg);
 }
 
 static int sn_topology_show(struct seq_file *s, void *d)
@@ -218,9 +225,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 	u8 region_size;
 	u16 nasid_mask;
 	int nasid_msb;
-	char *pci_topo_buf;
 	int pci_bus_ordinal = 0;
-	static int pci_topo_buf_len = 256;
 
 	if (obj == objs) {
 		seq_printf(s, "# sn_topology version 2\n");
@@ -299,41 +304,13 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		/*
 		 * PCI busses attached to this node, if any
 		 */
-		do {
-			if (sn_hwperf_location_to_bpos(obj->location,
-				&rack, &bay, &slot, &slab)) {
-				break;
-			}
+		if (sn_hwperf_location_to_bpos(obj->location,
+			&rack, &bay, &slot, &slab)) {
+			/* export pci bus info */
+			print_pci_topology(s, obj, &pci_bus_ordinal,
+				rack, bay, slot, slab);
 
-			if (!(pci_topo_buf = vmalloc(pci_topo_buf_len))) {
-				printk("sn_topology_show: vmalloc failed\n");
-				break;
-			}
-
-			e = ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab,
-			    	pci_topo_buf, pci_topo_buf_len);
-
-			switch (e) {
-			case SALRET_NOT_IMPLEMENTED:
-			case SALRET_INVALID_ARG:
-				/* ignore, don't print anything */
-				e = SN_HWPERF_OP_OK;
-				break;
-
-			case SALRET_ERROR:
-				/* retry with a bigger buffer */ 
-				pci_topo_buf_len += 256;
-				break;
-
-			case SN_HWPERF_OP_OK:
-			default:
-				/* export pci bus info */
-				print_pci_topology(s, obj, &pci_bus_ordinal,
-					pci_topo_buf, pci_topo_buf_len);
-				break;
-			}
-			vfree(pci_topo_buf);
-		} while (e != SN_HWPERF_OP_OK && pci_topo_buf_len < 0x200000);
+		}
 	}
 
 	if (obj->ports) {
diff --git a/include/asm-ia64/sn/geo.h b/include/asm-ia64/sn/geo.h
index f566343d25f8..84b254603b8d 100644
--- a/include/asm-ia64/sn/geo.h
+++ b/include/asm-ia64/sn/geo.h
@@ -18,32 +18,34 @@
 #define GEOID_SIZE	8	/* Would 16 be better?  The size can
 				   be different on different platforms. */
 
-#define MAX_SLABS	0xe	/* slabs per module */
+#define MAX_SLOTS	0xf	/* slots per module */
+#define MAX_SLABS	0xf	/* slabs per slot */
 
 typedef unsigned char	geo_type_t;
 
 /* Fields common to all substructures */
-typedef struct geo_any_s {
+typedef struct geo_common_s {
     moduleid_t	module;		/* The module (box) this h/w lives in */
     geo_type_t	type;		/* What type of h/w is named by this geoid_t */
-    slabid_t	slab;		/* The logical assembly within the module */
-} geo_any_t;
+    slabid_t	slab:4;		/* slab (ASIC), 0 .. 15 within slot */
+    slotid_t	slot:4;		/* slot (Blade), 0 .. 15 within module */
+} geo_common_t;
 
 /* Additional fields for particular types of hardware */
 typedef struct geo_node_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_node_t;
 
 typedef struct geo_rtr_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_rtr_t;
 
 typedef struct geo_iocntl_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_iocntl_t;
 
 typedef struct geo_pcicard_s {
-    geo_iocntl_t	any;
+    geo_iocntl_t	common;
     char		bus;	/* Bus/widget number */
     char		slot;	/* PCI slot number */
 } geo_pcicard_t;
@@ -62,14 +64,14 @@ typedef struct geo_mem_s {
 
 
 typedef union geoid_u {
-    geo_any_t	any;
-    geo_node_t	node;
+    geo_common_t	common;
+    geo_node_t		node;
     geo_iocntl_t	iocntl;
     geo_pcicard_t	pcicard;
-    geo_rtr_t	rtr;
-    geo_cpu_t	cpu;
-    geo_mem_t	mem;
-    char	padsize[GEOID_SIZE];
+    geo_rtr_t		rtr;
+    geo_cpu_t		cpu;
+    geo_mem_t		mem;
+    char		padsize[GEOID_SIZE];
 } geoid_t;
 
 
@@ -104,19 +106,26 @@ typedef union geoid_u {
 #define INVALID_CNODEID         ((cnodeid_t)-1)
 #define INVALID_PNODEID         ((pnodeid_t)-1)
 #define INVALID_SLAB            (slabid_t)-1
+#define INVALID_SLOT            (slotid_t)-1
 #define INVALID_MODULE          ((moduleid_t)-1)
 #define INVALID_PARTID          ((partid_t)-1)
 
 static inline slabid_t geo_slab(geoid_t g)
 {
-	return (g.any.type == GEO_TYPE_INVALID) ?
-		INVALID_SLAB : g.any.slab;
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLAB : g.common.slab;
+}
+
+static inline slotid_t geo_slot(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLOT : g.common.slot;
 }
 
 static inline moduleid_t geo_module(geoid_t g)
 {
-	return (g.any.type == GEO_TYPE_INVALID) ?
-		INVALID_MODULE : g.any.module;
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_MODULE : g.common.module;
 }
 
 extern geoid_t cnodeid_get_geoid(cnodeid_t cnode);
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 581f9a783045..123c1a50a9dc 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -74,10 +74,10 @@
 #define  SN_SAL_IOIF_GET_PCIBUS_INFO		   0x02000056
 #define  SN_SAL_IOIF_GET_PCIDEV_INFO		   0x02000057
 #define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058
-#define  SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000059
 
 #define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
 #define SN_SAL_BTE_RECOVER			   0x02000061
+#define SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000062
 
 /*
  * Service-specific constants
@@ -1015,7 +1015,7 @@ ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2,
 
 static inline int
 ia64_sn_ioif_get_pci_topology(u64 rack, u64 bay, u64 slot, u64 slab,
-			      char *buf, u64 len)
+			      u64 buf, u64 len)
 {
 	struct ia64_sal_retval rv;
 	SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY,
diff --git a/include/asm-ia64/sn/types.h b/include/asm-ia64/sn/types.h
index 586ed47cae9c..8e04ee211e59 100644
--- a/include/asm-ia64/sn/types.h
+++ b/include/asm-ia64/sn/types.h
@@ -16,7 +16,8 @@ typedef signed short	nasid_t;	/* node id in numa-as-id space */
 typedef signed char	partid_t;	/* partition ID type */
 typedef unsigned int    moduleid_t;     /* user-visible module number type */
 typedef unsigned int    cmoduleid_t;    /* kernel compact module id type */
-typedef signed char     slabid_t;
+typedef unsigned char	slotid_t;	/* slot (blade) within module */
+typedef unsigned char	slabid_t;	/* slab (asic) within slot */
 typedef u64 nic_t;
 typedef unsigned long iopaddr_t;
 typedef unsigned long paddr_t;

From f0a8d3c9ec1f82d2a41faa6c46b8db7bd5b1eb8d Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Apr 2005 13:22:44 -0700
Subject: [PATCH 25/34] [IA64] Need to handle lfetch in "no_context" case.

Thanks to Mark for tracking down this one.  Users of __copy_from_user_inatomic()
will be sad if we don't handle lfetch faults for the "no_context" case.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/fault.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index da859125aaef..4174ec999dde 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -209,10 +209,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	}
 
   no_context:
-	if (isr & IA64_ISR_SP) {
+	if ((isr & IA64_ISR_SP)
+	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
+	{
 		/*
-		 * This fault was due to a speculative load set the "ed" bit in the psr to
-		 * ensure forward progress (target register will get a NaT).
+		 * This fault was due to a speculative load or lfetch.fault, set the "ed"
+		 * bit in the psr to ensure forward progress.  (Target register will get a
+		 * NaT for ld.s, lfetch will be canceled.)
 		 */
 		ia64_psr(regs)->ed = 1;
 		return;

From 6118ec847e8e35393efc0f88394c2f5dd48c3313 Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Mon, 25 Apr 2005 13:23:47 -0700
Subject: [PATCH 26/34] [IA64] __copy_user breaks on unaligned src

memcpy_mck.S::__copy_user breaks in the prefetch code under these conditions :-

* src is unaligned and
* dst is near the end of a page and
* the page after dst is unmapped.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/lib/memcpy_mck.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index 6f26ef7cc236..3c2cd2f04db9 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -300,7 +300,7 @@ EK(.ex_handler,	(p[D])	st8 [dst1] = t15, 4*8)
 	add	src_pre_mem=0,src0	// prefetch src pointer
 	add	dst_pre_mem=0,dst0	// prefetch dest pointer
 	and	src0=-8,src0		// 1st src pointer
-(p7)	mov	ar.lc = r21
+(p7)	mov	ar.lc = cnt
 (p8)	mov	ar.lc = r0
 	;;
 	TEXT_ALIGN(32)

From e927ecb05e1ce4bbb1e10f57008c94994e2160f5 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 25 Apr 2005 13:25:06 -0700
Subject: [PATCH 27/34] [IA64] multi-core/multi-thread identification

Version 3 - rediffed to apply on top of Ashok's hotplug cpu
patch.  /proc/cpuinfo output in step with x86.

This is an updated MC/MT identification patch based on the
previous discussions on list.

Add the Multi-core and Multi-threading detection for IPF.
  - Add new core and threading related fields in /proc/cpuinfo.
		Physical id
		Core id
		Thread id
		Siblings
  - setup the cpu_core_map and cpu_sibling_map appropriately
  - Handles Hot plug CPU

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Gordon Jin <gordon.jin@intel.com>
Signed-off-by: Rohit Seth <rohit.seth@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c     |  69 +++++++++++-
 arch/ia64/kernel/smpboot.c   | 206 +++++++++++++++++++++++++++++++++++
 include/asm-ia64/pal.h       |  68 ++++++++++++
 include/asm-ia64/processor.h |   7 ++
 include/asm-ia64/sal.h       |  12 ++
 include/asm-ia64/smp.h       |   5 +
 6 files changed, 365 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f05650c801d2..88043841fb8a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -4,10 +4,15 @@
  * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2000, 2004 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  *
+ * 12/26/04 S.Siddha, G.Jin, R.Seth
+ *			Add multi-threading and multi-core detection
  * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
  * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
  * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
@@ -296,6 +301,34 @@ mark_bsp_online (void)
 #endif
 }
 
+#ifdef CONFIG_SMP
+static void
+check_for_logical_procs (void)
+{
+	pal_logical_to_physical_t info;
+	s64 status;
+
+	status = ia64_pal_logical_to_phys(0, &info);
+	if (status == -1) {
+		printk(KERN_INFO "No logical to physical processor mapping "
+		       "available\n");
+		return;
+	}
+	if (status) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	/*
+	 * Total number of siblings that BSP has.  Though not all of them 
+	 * may have booted successfully. The correct number of siblings 
+	 * booted is in info.overview_num_log.
+	 */
+	smp_num_siblings = info.overview_tpc;
+	smp_num_cpucores = info.overview_cpp;
+}
+#endif
+
 void __init
 setup_arch (char **cmdline_p)
 {
@@ -356,6 +389,19 @@ setup_arch (char **cmdline_p)
 
 #ifdef CONFIG_SMP
 	cpu_physical_id(0) = hard_smp_processor_id();
+
+	cpu_set(0, cpu_sibling_map[0]);
+	cpu_set(0, cpu_core_map[0]);
+
+	check_for_logical_procs();
+	if (smp_num_cpucores > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Core capable: number of cores=%d\n",
+		       smp_num_cpucores);
+	if (smp_num_siblings > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
+		       smp_num_siblings);
 #endif
 
 	cpu_init();	/* initialize the bootstrap CPU */
@@ -459,12 +505,23 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   "cpu regs   : %u\n"
 		   "cpu MHz    : %lu.%06lu\n"
 		   "itc MHz    : %lu.%06lu\n"
-		   "BogoMIPS   : %lu.%02lu\n\n",
+		   "BogoMIPS   : %lu.%02lu\n",
 		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
 		   features, c->ppn, c->number,
 		   c->proc_freq / 1000000, c->proc_freq % 1000000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+#ifdef CONFIG_SMP
+	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
+		seq_printf(m,
+		   	   "physical id: %u\n"
+		   	   "core id    : %u\n"
+		   	   "thread id  : %u\n",
+		   	   c->socket_id, c->core_id, c->thread_id);
+	seq_printf(m, "siblings   : %u\n", c->num_log);
+#endif
+	seq_printf(m,"\n");
+
 	return 0;
 }
 
@@ -533,6 +590,14 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	memcpy(c->vendor, cpuid.field.vendor, 16);
 #ifdef CONFIG_SMP
 	c->cpu = smp_processor_id();
+
+	/* below default values will be overwritten  by identify_siblings() 
+	 * for Multi-Threading/Multi-Core capable cpu's
+	 */
+	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
+	c->socket_id = -1;
+
+	identify_siblings(c);
 #endif
 	c->ppn = cpuid.field.ppn;
 	c->number = cpuid.field.number;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index dbc6b610cc64..0d5ee57c9865 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -3,6 +3,11 @@
  *
  * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2001, 2004-2005 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ *	Ashok Raj  <ashok.raj@intel.com>
  *
  * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
  * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
@@ -10,6 +15,11 @@
  *						smp_boot_cpus()/smp_commence() is replaced by
  *						smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
  * 04/06/21 Ashok Raj		<ashok.raj@intel.com> Added CPU Hotplug Support
+ * 04/12/26 Jin Gordon <gordon.jin@intel.com>
+ * 04/12/26 Rohit Seth <rohit.seth@intel.com>
+ *						Add multi-threading and multi-core detection
+ * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
+ *						Setup cpu_sibling_map and cpu_core_map
  */
 #include <linux/config.h>
 
@@ -122,6 +132,11 @@ EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+int smp_num_siblings = 1;
+int smp_num_cpucores = 1;
+
 /* which logical CPU number maps to which CPU (physical APIC ID) */
 volatile int ia64_cpu_to_sapicid[NR_CPUS];
 EXPORT_SYMBOL(ia64_cpu_to_sapicid);
@@ -598,7 +613,68 @@ void __devinit smp_prepare_boot_cpu(void)
 	cpu_set(smp_processor_id(), cpu_callin_map);
 }
 
+/*
+ * mt_info[] is a temporary store for all info returned by
+ * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the
+ * specific cpu comes.
+ */
+static struct {
+	__u32   socket_id;
+	__u16   core_id;
+	__u16   thread_id;
+	__u16   proc_fixed_addr;
+	__u8    valid;
+}mt_info[NR_CPUS] __devinit;
+
 #ifdef CONFIG_HOTPLUG_CPU
+static inline void
+remove_from_mtinfo(int cpu)
+{
+	int i;
+
+	for_each_cpu(i)
+		if (mt_info[i].valid &&  mt_info[i].socket_id ==
+		    				cpu_data(cpu)->socket_id)
+			mt_info[i].valid = 0;
+}
+
+static inline void
+clear_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_cpu_mask(i, cpu_sibling_map[cpu])
+		cpu_clear(cpu, cpu_sibling_map[i]);
+	for_each_cpu_mask(i, cpu_core_map[cpu])
+		cpu_clear(cpu, cpu_core_map[i]);
+
+	cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE;
+}
+
+static void
+remove_siblinginfo(int cpu)
+{
+	int last = 0;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_clear(cpu, cpu_core_map[cpu]);
+		cpu_clear(cpu, cpu_sibling_map[cpu]);
+		return;
+	}
+
+	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+
+	/* remove it from all sibling map's */
+	clear_cpu_sibling_map(cpu);
+
+	/* if this cpu is the last in the core group, remove all its info 
+	 * from mt_info structure
+	 */
+	if (last)
+		remove_from_mtinfo(cpu);
+}
+
 extern void fixup_irqs(void);
 /* must be called with cpucontrol mutex held */
 int __cpu_disable(void)
@@ -611,6 +687,7 @@ int __cpu_disable(void)
 	if (cpu == 0)
 		return -EBUSY;
 
+	remove_siblinginfo(cpu);
 	fixup_irqs();
 	local_flush_tlb_all();
 	cpu_clear(cpu, cpu_callin_map);
@@ -663,6 +740,23 @@ smp_cpus_done (unsigned int dummy)
 	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
 }
 
+static inline void __devinit
+set_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
+				cpu_set(i, cpu_sibling_map[cpu]);
+				cpu_set(cpu, cpu_sibling_map[i]);
+			}
+		}
+	}
+}
+
 int __devinit
 __cpu_up (unsigned int cpu)
 {
@@ -685,6 +779,15 @@ __cpu_up (unsigned int cpu)
 	if (ret < 0)
 		return ret;
 
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, cpu_core_map[cpu]);
+		return 0;
+	}
+
+	set_cpu_sibling_map(cpu);
+
 	return 0;
 }
 
@@ -712,3 +815,106 @@ init_smp_config(void)
 		       ia64_sal_strerror(sal_ret));
 }
 
+static inline int __devinit
+check_for_mtinfo_index(void)
+{
+	int i;
+	
+	for_each_cpu(i)
+		if (!mt_info[i].valid)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Search the mt_info to find out if this socket's cid/tid information is
+ * cached or not. If the socket exists, fill in the core_id and thread_id 
+ * in cpuinfo
+ */
+static int __devinit
+check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c)
+{
+	int i;
+	__u32 sid = c->socket_id;
+
+	for_each_cpu(i) {
+		if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address
+		    && mt_info[i].socket_id == sid) {
+			c->core_id = mt_info[i].core_id;
+			c->thread_id = mt_info[i].thread_id;
+			return 1; /* not a new socket */
+		}
+	}
+	return 0;
+}
+
+/*
+ * identify_siblings(cpu) gets called from identify_cpu. This populates the 
+ * information related to logical execution units in per_cpu_data structure.
+ */
+void __devinit
+identify_siblings(struct cpuinfo_ia64 *c)
+{
+	s64 status;
+	u16 pltid;
+	u64 proc_fixed_addr;
+	int count, i;
+	pal_logical_to_physical_t info;
+
+	if (smp_num_cpucores == 1 && smp_num_siblings == 1)
+		return;
+
+	if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+		return;
+	}
+	if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status);
+		return;
+	}
+
+	c->socket_id =  (pltid << 8) | info.overview_ppid;
+	c->cores_per_socket = info.overview_cpp;
+	c->threads_per_core = info.overview_tpc;
+	count = c->num_log = info.overview_num_log;
+
+	/* If the thread and core id information is already cached, then
+	 * we will simply update cpu_info and return. Otherwise, we will
+	 * do the PAL calls and cache core and thread id's of all the siblings.
+	 */
+	if (check_for_new_socket(proc_fixed_addr, c))
+		return;
+
+	for (i = 0; i < count; i++) {
+		int index;
+
+		if (i && (status = ia64_pal_logical_to_phys(i, &info))
+			  != PAL_STATUS_SUCCESS) {
+                	printk(KERN_ERR "ia64_pal_logical_to_phys failed"
+					" with %ld\n", status);
+                	return;
+		}
+		if (info.log2_la == proc_fixed_addr) {
+			c->core_id = info.log1_cid;
+			c->thread_id = info.log1_tid;
+		}
+
+		index = check_for_mtinfo_index();
+		/* We will not do the mt_info caching optimization in this case.
+		 */
+		if (index < 0)
+			continue;
+
+		mt_info[index].valid = 1;
+		mt_info[index].socket_id = c->socket_id;
+		mt_info[index].core_id = info.log1_cid;
+		mt_info[index].thread_id = info.log1_tid;
+		mt_info[index].proc_fixed_addr = info.log2_la;
+	}
+}
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 5dd477ffb88e..2303a10ee595 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -67,6 +67,7 @@
 #define PAL_REGISTER_INFO	39	/* return AR and CR register information*/
 #define PAL_SHUTDOWN		40	/* enter processor shutdown state */
 #define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
+#define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
 
 #define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
 #define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
@@ -1559,6 +1560,73 @@ ia64_pal_prefetch_visibility (s64 trans_type)
 	return iprv.status;
 }
 
+/* data structure for getting information on logical to physical mappings */
+typedef union pal_log_overview_u {
+	struct {
+		u64	num_log		:16,	/* Total number of logical
+						 * processors on this die
+						 */
+			tpc		:8,	/* Threads per core */
+			reserved3	:8,	/* Reserved */
+			cpp		:8,	/* Cores per processor */
+			reserved2	:8,	/* Reserved */
+			ppid		:8,	/* Physical processor ID */
+			reserved1	:8;	/* Reserved */
+	} overview_bits;
+	u64 overview_data;
+} pal_log_overview_t;
+
+typedef union pal_proc_n_log_info1_u{
+	struct {
+		u64	tid		:16,	/* Thread id */
+			reserved2	:16,	/* Reserved */
+			cid		:16,	/* Core id */
+			reserved1	:16;	/* Reserved */
+	} ppli1_bits;
+	u64	ppli1_data;
+} pal_proc_n_log_info1_t;
+
+typedef union pal_proc_n_log_info2_u {
+	struct {
+		u64	la		:16,	/* Logical address */
+			reserved	:48;	/* Reserved */
+	} ppli2_bits;
+	u64	ppli2_data;
+} pal_proc_n_log_info2_t;
+
+typedef struct pal_logical_to_physical_s
+{
+	pal_log_overview_t overview;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_logical_to_physical_t;
+
+#define overview_num_log	overview.overview_bits.num_log
+#define overview_tpc		overview.overview_bits.tpc
+#define overview_cpp		overview.overview_bits.cpp
+#define overview_ppid		overview.overview_bits.ppid
+#define log1_tid		ppli1.ppli1_bits.tid
+#define log1_cid		ppli1.ppli1_bits.cid
+#define log2_la			ppli2.ppli2_bits.la
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0);
+
+	if (iprv.status == PAL_STATUS_SUCCESS)
+	{
+		if (proc_number == 0)
+			mapping->overview.overview_data = iprv.v0;
+		mapping->ppli1.ppli1_data = iprv.v1;
+		mapping->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_PAL_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 983798ec1791..9e1ba8b7fb68 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -148,6 +148,13 @@ struct cpuinfo_ia64 {
 #ifdef CONFIG_SMP
 	__u64 loops_per_jiffy;
 	int cpu;
+	__u32 socket_id;	/* physical processor socket id */
+	__u16 core_id;		/* core id */
+	__u16 thread_id;	/* thread id */
+	__u16 num_log;		/* Total number of logical processors on
+				 * this socket that were successfully booted */
+	__u8  cores_per_socket;	/* Cores per processor socket */
+	__u8  threads_per_core;	/* Threads per core */
 #endif
 
 	/* CPUID-derived information: */
diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
index 240676f75390..29df88bdd2bc 100644
--- a/include/asm-ia64/sal.h
+++ b/include/asm-ia64/sal.h
@@ -91,6 +91,7 @@ extern spinlock_t sal_lock;
 #define SAL_PCI_CONFIG_READ		0x01000010
 #define SAL_PCI_CONFIG_WRITE		0x01000011
 #define SAL_FREQ_BASE			0x01000012
+#define SAL_PHYSICAL_ID_INFO		0x01000013
 
 #define SAL_UPDATE_PAL			0x01000020
 
@@ -815,6 +816,17 @@ ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size,
 	return isrv.status;
 }
 
+/* Get physical processor die mapping in the platform. */
+static inline s64
+ia64_sal_physical_id_info(u16 *splid)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0);
+	if (splid)
+		*splid = isrv.v0;
+	return isrv.status;
+}
+
 extern unsigned long sal_platform_features;
 
 extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *);
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index c4a227acfeb0..3ba1a061e4ae 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -56,6 +56,10 @@ extern struct smp_boot_data {
 extern char no_int_routing __devinitdata;
 
 extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern int smp_num_siblings;
+extern int smp_num_cpucores;
 extern void __iomem *ipi_base_addr;
 extern unsigned char smp_int_redirect;
 
@@ -124,6 +128,7 @@ extern int smp_call_function_single (int cpuid, void (*func) (void *info), void
 extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
+extern void identify_siblings (struct cpuinfo_ia64 *);
 
 #else
 

From 24eeb568aeeaee771b9f0a6fd6f5d01040a887da Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Mon, 25 Apr 2005 13:26:23 -0700
Subject: [PATCH 28/34] [IA64] vector sharing (Large I/O system support)

Current ia64 linux cannot handle greater than 184 interrupt sources
because of the lack of vectors. The following patch enables ia64 linux
to handle greater than 184 interrupt sources by allowing the same
vector number to be shared by multiple IOSAPIC's RTEs. The design of
this patch is besed on "Intel(R) Itanium(R) Processor Family Interrupt
Architecture Guide".

Even if you don't have a large I/O system, you can see the behavior of
vector sharing by changing IOSAPIC_LAST_DEVICE_VECTOR to fewer value.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/iosapic.c  | 364 +++++++++++++++++++++++++++---------
 arch/ia64/kernel/irq_ia64.c |  16 +-
 include/asm-ia64/hw_irq.h   |   1 +
 3 files changed, 289 insertions(+), 92 deletions(-)

diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c15be5c38f56..11a221cc8dc3 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -79,6 +79,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
+#include <linux/bootmem.h>
 
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
@@ -98,19 +99,30 @@
 #define DBG(fmt...)
 #endif
 
+#define NR_PREALLOCATE_RTE_ENTRIES	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define RTE_PREALLOCATED	(1)
+
 static DEFINE_SPINLOCK(iosapic_lock);
 
 /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
 
-static struct iosapic_intr_info {
+struct iosapic_rte_info {
+	struct list_head rte_list;	/* node in list of RTEs sharing the same vector */
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	u32		low32;		/* current value of low word of Redirection table entry */
 	unsigned int	gsi_base;	/* first GSI assigned to this IOSAPIC */
-	char		rte_index;	/* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
+	char		rte_index;	/* IOSAPIC RTE index */
+	int		refcnt;		/* reference counter */
+	unsigned int	flags;		/* flags */
+} ____cacheline_aligned;
+
+static struct iosapic_intr_info {
+	struct list_head rtes;		/* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+	int		count;		/* # of RTEs that shares this vector */
+	u32		low32;		/* current value of low word of Redirection table entry */
+	unsigned int	dest;		/* destination CPU physical ID */
 	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
 	unsigned char 	polarity: 1;	/* interrupt polarity (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
-	int		refcnt;		/* reference counter */
 } iosapic_intr_info[IA64_NUM_VECTORS];
 
 static struct iosapic {
@@ -126,6 +138,8 @@ static int num_iosapic;
 
 static unsigned char pcat_compat __initdata;	/* 8259 compatibility flag */
 
+static int iosapic_kmalloc_ok;
+static LIST_HEAD(free_rte_list);
 
 /*
  * Find an IOSAPIC associated with a GSI
@@ -147,10 +161,12 @@ static inline int
 _gsi_to_vector (unsigned int gsi)
 {
 	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
 
 	for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
-		if (info->gsi_base + info->rte_index == gsi)
-			return info - iosapic_intr_info;
+		list_for_each_entry(rte, &info->rtes, rte_list)
+			if (rte->gsi_base + rte->rte_index == gsi)
+				return info - iosapic_intr_info;
 	return -1;
 }
 
@@ -167,33 +183,52 @@ gsi_to_vector (unsigned int gsi)
 int
 gsi_to_irq (unsigned int gsi)
 {
+	unsigned long flags;
+	int irq;
 	/*
 	 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
 	 * numbers...
 	 */
-	return _gsi_to_vector(gsi);
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		irq = _gsi_to_vector(gsi);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	return irq;
+}
+
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+{
+	struct iosapic_rte_info *rte;
+
+	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
+		if (rte->gsi_base + rte->rte_index == gsi)
+			return rte;
+	return NULL;
 }
 
 static void
-set_rte (unsigned int vector, unsigned int dest, int mask)
+set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 {
 	unsigned long pol, trigger, dmode;
 	u32 low32, high32;
 	char __iomem *addr;
 	int rte_index;
 	char redir;
+	struct iosapic_rte_info *rte;
 
 	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
 
-	rte_index = iosapic_intr_info[vector].rte_index;
-	if (rte_index < 0)
+	rte = gsi_vector_to_rte(gsi, vector);
+	if (!rte)
 		return;		/* not an IOSAPIC interrupt */
 
-	addr    = iosapic_intr_info[vector].addr;
+	rte_index = rte->rte_index;
+	addr	= rte->addr;
 	pol     = iosapic_intr_info[vector].polarity;
 	trigger = iosapic_intr_info[vector].trigger;
 	dmode   = iosapic_intr_info[vector].dmode;
-	vector &= (~IA64_IRQ_REDIRECTED);
 
 	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
 
@@ -221,6 +256,7 @@ set_rte (unsigned int vector, unsigned int dest, int mask)
 	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
 	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
 	iosapic_intr_info[vector].low32 = low32;
+	iosapic_intr_info[vector].dest = dest;
 }
 
 static void
@@ -237,18 +273,20 @@ mask_irq (unsigned int irq)
 	u32 low32;
 	int rte_index;
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
-	addr = iosapic_intr_info[vec].addr;
-	rte_index = iosapic_intr_info[vec].rte_index;
-
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt! */
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		/* set only the mask bit */
 		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
@@ -261,16 +299,19 @@ unmask_irq (unsigned int irq)
 	u32 low32;
 	int rte_index;
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
-	addr = iosapic_intr_info[vec].addr;
-	rte_index = iosapic_intr_info[vec].rte_index;
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt! */
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
@@ -286,6 +327,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 	char __iomem *addr;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	ia64_vector vec;
+	struct iosapic_rte_info *rte;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 	vec = irq_to_vector(irq);
@@ -295,10 +337,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 
 	dest = cpu_physical_id(first_cpu(mask));
 
-	rte_index = iosapic_intr_info[vec].rte_index;
-	addr = iosapic_intr_info[vec].addr;
-
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
@@ -318,8 +357,13 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
 
 		iosapic_intr_info[vec].low32 = low32;
-		iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		iosapic_intr_info[vec].dest = dest;
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 #endif
@@ -340,9 +384,11 @@ static void
 iosapic_end_level_irq (unsigned int irq)
 {
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
 	move_irq(irq);
-	iosapic_eoi(iosapic_intr_info[vec].addr, vec);
+	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
+		iosapic_eoi(rte->addr, vec);
 }
 
 #define iosapic_shutdown_level_irq	mask_irq
@@ -422,6 +468,34 @@ iosapic_version (char __iomem *addr)
 	return iosapic_read(addr, IOSAPIC_VERSION);
 }
 
+static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+{
+	int i, vector = -1, min_count = -1;
+	struct iosapic_intr_info *info;
+
+	/*
+	 * shared vectors for edge-triggered interrupts are not
+	 * supported yet
+	 */
+	if (trigger == IOSAPIC_EDGE)
+		return -1;
+
+	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
+		info = &iosapic_intr_info[i];
+		if (info->trigger == trigger && info->polarity == pol &&
+		    (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+			if (min_count == -1 || info->count < min_count) {
+				vector = i;
+				min_count = info->count;
+			}
+		}
+	}
+	if (vector < 0)
+		panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+
+	return vector;
+}
+
 /*
  * if the given vector is already owned by other,
  *  assign a new vector for the other and make the vector available
@@ -431,19 +505,63 @@ iosapic_reassign_vector (int vector)
 {
 	int new_vector;
 
-	if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr
-	    || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode
-	    || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
-	{
+	if (!list_empty(&iosapic_intr_info[vector].rtes)) {
 		new_vector = assign_irq_vector(AUTO_ASSIGN);
 		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
 		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
 		       sizeof(struct iosapic_intr_info));
+		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
+		list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
 		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].rte_index = -1;
+		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
 }
 
+static struct iosapic_rte_info *iosapic_alloc_rte (void)
+{
+	int i;
+	struct iosapic_rte_info *rte;
+	int preallocated = 0;
+
+	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
+		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+		if (!rte)
+			return NULL;
+		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
+			list_add(&rte->rte_list, &free_rte_list);
+	}
+
+	if (!list_empty(&free_rte_list)) {
+		rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+		list_del(&rte->rte_list);
+		preallocated++;
+	} else {
+		rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
+		if (!rte)
+			return NULL;
+	}
+
+	memset(rte, 0, sizeof(struct iosapic_rte_info));
+	if (preallocated)
+		rte->flags |= RTE_PREALLOCATED;
+
+	return rte;
+}
+
+static void iosapic_free_rte (struct iosapic_rte_info *rte)
+{
+	if (rte->flags & RTE_PREALLOCATED)
+		list_add_tail(&rte->rte_list, &free_rte_list);
+	else
+		kfree(rte);
+}
+
+static inline int vector_is_shared (int vector)
+{
+	return (iosapic_intr_info[vector].count > 1);
+}
+
 static void
 register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
@@ -454,6 +572,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	int index;
 	unsigned long gsi_base;
 	void __iomem *iosapic_address;
+	struct iosapic_rte_info *rte;
 
 	index = find_iosapic(gsi);
 	if (index < 0) {
@@ -464,14 +583,33 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	iosapic_address = iosapic_lists[index].addr;
 	gsi_base = iosapic_lists[index].gsi_base;
 
-	rte_index = gsi - gsi_base;
-	iosapic_intr_info[vector].rte_index = rte_index;
+	rte = gsi_vector_to_rte(gsi, vector);
+	if (!rte) {
+		rte = iosapic_alloc_rte();
+		if (!rte) {
+			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+			return;
+		}
+
+		rte_index = gsi - gsi_base;
+		rte->rte_index	= rte_index;
+		rte->addr	= iosapic_address;
+		rte->gsi_base	= gsi_base;
+		rte->refcnt++;
+		list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
+		iosapic_intr_info[vector].count++;
+	}
+	else if (vector_is_shared(vector)) {
+		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
+		if (info->trigger != trigger || info->polarity != polarity) {
+			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+			return;
+		}
+	}
+
 	iosapic_intr_info[vector].polarity = polarity;
 	iosapic_intr_info[vector].dmode    = delivery;
-	iosapic_intr_info[vector].addr     = iosapic_address;
-	iosapic_intr_info[vector].gsi_base = gsi_base;
 	iosapic_intr_info[vector].trigger  = trigger;
-	iosapic_intr_info[vector].refcnt++;
 
 	if (trigger == IOSAPIC_EDGE)
 		irq_type = &irq_type_iosapic_edge;
@@ -493,6 +631,13 @@ get_target_cpu (unsigned int gsi, int vector)
 #ifdef CONFIG_SMP
 	static int cpu = -1;
 
+	/*
+	 * In case of vector shared by multiple RTEs, all RTEs that
+	 * share the vector need to use the same destination CPU.
+	 */
+	if (!list_empty(&iosapic_intr_info[vector].rtes))
+		return iosapic_intr_info[vector].dest;
+
 	/*
 	 * If the platform supports redirection via XTP, let it
 	 * distribute interrupts.
@@ -565,10 +710,12 @@ int
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector;
+	int vector, mask = 1;
 	unsigned int dest;
 	unsigned long flags;
-
+	struct iosapic_rte_info *rte;
+	u32 low32;
+again:
 	/*
 	 * If this GSI has already been registered (i.e., it's a
 	 * shared interrupt, or we lost a race to register it),
@@ -578,20 +725,46 @@ iosapic_register_intr (unsigned int gsi,
 	{
 		vector = gsi_to_vector(gsi);
 		if (vector > 0) {
-			iosapic_intr_info[vector].refcnt++;
+			rte = gsi_vector_to_rte(gsi, vector);
+			rte->refcnt++;
 			spin_unlock_irqrestore(&iosapic_lock, flags);
 			return vector;
 		}
-
-		vector = assign_irq_vector(AUTO_ASSIGN);
-		dest = get_target_cpu(gsi, vector);
-		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
-			polarity, trigger);
-
-		set_rte(vector, dest, 1);
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 
+	/* If vector is running out, we try to find a sharable vector */
+	vector = assign_irq_vector_nopanic(AUTO_ASSIGN);
+	if (vector < 0)
+		vector = iosapic_find_sharable_vector(trigger, polarity);
+
+	spin_lock_irqsave(&irq_descp(vector)->lock, flags);
+	spin_lock(&iosapic_lock);
+	{
+		if (gsi_to_vector(gsi) > 0) {
+			if (list_empty(&iosapic_intr_info[vector].rtes))
+				free_irq_vector(vector);
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			goto again;
+		}
+
+		dest = get_target_cpu(gsi, vector);
+		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
+			      polarity, trigger);
+
+		/*
+		 * If the vector is shared and already unmasked for
+		 * other interrupt sources, don't mask it.
+		 */
+		low32 = iosapic_intr_info[vector].low32;
+		if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
+			mask = 0;
+		set_rte(gsi, vector, dest, mask);
+	}
+	spin_unlock_irq(&iosapic_lock);
+	spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+
 	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
 	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
@@ -607,8 +780,10 @@ iosapic_unregister_intr (unsigned int gsi)
 	unsigned long flags;
 	int irq, vector;
 	irq_desc_t *idesc;
-	int rte_index;
+	u32 low32;
 	unsigned long trigger, polarity;
+	unsigned int dest;
+	struct iosapic_rte_info *rte;
 
 	/*
 	 * If the irq associated with the gsi is not found,
@@ -627,54 +802,56 @@ iosapic_unregister_intr (unsigned int gsi)
 	spin_lock_irqsave(&idesc->lock, flags);
 	spin_lock(&iosapic_lock);
 	{
-		rte_index = iosapic_intr_info[vector].rte_index;
-		if (rte_index < 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
+		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
 			printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
 			WARN_ON(1);
-			return;
+			goto out;
 		}
 
-		if (--iosapic_intr_info[vector].refcnt > 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
-			return;
-		}
+		if (--rte->refcnt > 0)
+			goto out;
 
-		/*
-		 * If interrupt handlers still exist on the irq
-		 * associated with the gsi, don't unregister the
-		 * interrupt.
-		 */
-		if (idesc->action) {
-			iosapic_intr_info[vector].refcnt++;
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
-			printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq);
-			return;
-		}
+		/* Mask the interrupt */
+		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
+		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
 
-		/* Clear the interrupt controller descriptor. */
-		idesc->handler = &no_irq_type;
+		/* Remove the rte entry from the list */
+		list_del(&rte->rte_list);
+		iosapic_intr_info[vector].count--;
+		iosapic_free_rte(rte);
 
-		trigger  = iosapic_intr_info[vector].trigger;
+		trigger	 = iosapic_intr_info[vector].trigger;
 		polarity = iosapic_intr_info[vector].polarity;
+		dest     = iosapic_intr_info[vector].dest;
+		printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+		       cpu_logical_id(dest), dest, vector);
 
-		/* Clear the interrupt information. */
-		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+		if (list_empty(&iosapic_intr_info[vector].rtes)) {
+			/* Sanity check */
+			BUG_ON(iosapic_intr_info[vector].count);
+
+			/* Clear the interrupt controller descriptor */
+			idesc->handler = &no_irq_type;
+
+			/* Clear the interrupt information */
+			memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
+			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
+
+			if (idesc->action) {
+				printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+				WARN_ON(1);
+			}
+
+			/* Free the interrupt vector */
+			free_irq_vector(vector);
+		}
 	}
+ out:
 	spin_unlock(&iosapic_lock);
 	spin_unlock_irqrestore(&idesc->lock, flags);
-
-	/* Free the interrupt vector */
-	free_irq_vector(vector);
-
-	printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n",
-	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       vector);
 }
 #endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
@@ -724,7 +901,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 	       cpu_logical_id(dest), dest, vector);
 
-	set_rte(vector, dest, mask);
+	set_rte(gsi, vector, dest, mask);
 	return vector;
 }
 
@@ -750,7 +927,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
 	    cpu_logical_id(dest), dest, vector);
 
-	set_rte(vector, dest, 1);
+	set_rte(gsi, vector, dest, 1);
 }
 
 void __init
@@ -758,8 +935,10 @@ iosapic_system_init (int system_pcat_compat)
 {
 	int vector;
 
-	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
-		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
+		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);	/* mark as unused */
+	}
 
 	pcat_compat = system_pcat_compat;
 	if (pcat_compat) {
@@ -825,3 +1004,10 @@ map_iosapic_to_node(unsigned int gsi_base, int node)
 	return;
 }
 #endif
+
+static int __init iosapic_enable_kmalloc (void)
+{
+	iosapic_kmalloc_ok = 1;
+	return 0;
+}
+core_initcall (iosapic_enable_kmalloc);
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5ba06ebe355b..4fe60c7a2e90 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -63,20 +63,30 @@ EXPORT_SYMBOL(isa_irq_to_vector_map);
 static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
 
 int
-assign_irq_vector (int irq)
+assign_irq_vector_nopanic (int irq)
 {
 	int pos, vector;
  again:
 	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
 	vector = IA64_FIRST_DEVICE_VECTOR + pos;
 	if (vector > IA64_LAST_DEVICE_VECTOR)
-		/* XXX could look for sharable vectors instead of panic'ing... */
-		panic("assign_irq_vector: out of interrupt vectors!");
+		return -1;
 	if (test_and_set_bit(pos, ia64_vector_mask))
 		goto again;
 	return vector;
 }
 
+int
+assign_irq_vector (int irq)
+{
+	int vector = assign_irq_vector_nopanic(irq);
+
+	if (vector < 0)
+		panic("assign_irq_vector: out of interrupt vectors!");
+
+	return vector;
+}
+
 void
 free_irq_vector (int vector)
 {
diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h
index 041ab8c51a64..cd4e06b74ab6 100644
--- a/include/asm-ia64/hw_irq.h
+++ b/include/asm-ia64/hw_irq.h
@@ -81,6 +81,7 @@ extern __u8 isa_irq_to_vector_map[16];
 
 extern struct hw_interrupt_type irq_type_ia64_lsapic;	/* CPU-internal interrupt controller */
 
+extern int assign_irq_vector_nopanic (int irq); /* allocate a free vector without panic */
 extern int assign_irq_vector (int irq);	/* allocate a free vector */
 extern void free_irq_vector (int vector);
 extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);

From e1ed81ab7a34fc0f92f2e200825bdb6d86d6c8ef Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Apr 2005 13:27:12 -0700
Subject: [PATCH 29/34] [IA64] print "siblings" before {physical,core,thread}
 id

Rohit and Suresh changed their mind about the order to print things
in /proc/cpuinfo, but didn't include the change in the version of
the patch they sent to me.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 88043841fb8a..b7e6b4cb374b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -512,13 +512,13 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 #ifdef CONFIG_SMP
+	seq_printf(m, "siblings   : %u\n", c->num_log);
 	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
 		seq_printf(m,
 		   	   "physical id: %u\n"
 		   	   "core id    : %u\n"
 		   	   "thread id  : %u\n",
 		   	   c->socket_id, c->core_id, c->thread_id);
-	seq_printf(m, "siblings   : %u\n", c->num_log);
 #endif
 	seq_printf(m,"\n");
 

From b9e41d7fb62ae26adee84c18048037214ce5d866 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Mon, 25 Apr 2005 13:27:48 -0700
Subject: [PATCH 30/34] [IA64] iosapic.c: typo ...
 s/spin_unlock_irq/spin_unlock/

vector sharing patch had a typo ... mismatched spin_lock() with
a spin_unlock_irq().  Fix from Kenji Kaneshige.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/iosapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 11a221cc8dc3..88b014381df5 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -762,7 +762,7 @@ again:
 			mask = 0;
 		set_rte(gsi, vector, dest, mask);
 	}
-	spin_unlock_irq(&iosapic_lock);
+	spin_unlock(&iosapic_lock);
 	spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
 
 	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",

From 67639deb099c6085acc447c1b7d6a17792dedad0 Mon Sep 17 00:00:00 2001
From: Greg Howard <ghoward@sgi.com>
Date: Mon, 25 Apr 2005 13:28:52 -0700
Subject: [PATCH 31/34] [IA64] Altix system controller event handling

The following is an update of the patch I sent yesterday
(3/9/05) incorporating suggestions from Christoph Hellwig and
Andreas Schwab.  It allows Altix and Altix-like systems to
handle environmental events generated by the system controllers,
and should apply on top of Jack Steiner's patch of 3/1/05 ("New
chipset support for SN platform") and Mark Goodwin's patch of
3/8/05 ("Altix SN topology support for new chipsets and pci
topology").

Signed-off-by: Greg Howard <ghoward@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/char/Makefile        |  2 +-
 drivers/char/snsc.c          |  8 ++++++++
 drivers/char/snsc.h          | 40 ++++++++++++++++++++++++++++++++++++
 include/asm-ia64/sn/sn_sal.h | 14 +++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 3ea8cc80ea3d..e3f5c32aac55 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_SX)		+= sx.o generic_serial.o
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvc_console.o hvsi.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
-obj-$(CONFIG_SGI_SNSC)		+= snsc.o
+obj-$(CONFIG_SGI_SNSC)		+= snsc.o snsc_event.o
 obj-$(CONFIG_MMTIMER)		+= mmtimer.o
 obj-$(CONFIG_VIOCONS) += viocons.o
 obj-$(CONFIG_VIOTAPE)		+= viotape.o
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index ffb9143376bb..e3c0b52d943f 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -374,6 +374,7 @@ scdrv_init(void)
 	void *salbuf;
 	struct class_simple *snsc_class;
 	dev_t first_dev, dev;
+	nasid_t event_nasid = ia64_sn_get_console_nasid();
 
 	if (alloc_chrdev_region(&first_dev, 0, numionodes,
 				SYSCTL_BASENAME) < 0) {
@@ -441,6 +442,13 @@ scdrv_init(void)
 			ia64_sn_irtr_intr_enable(scd->scd_nasid,
 						 0 /*ignored */ ,
 						 SAL_IROUTER_INTR_RECV);
+
+                        /* on the console nasid, prepare to receive
+                         * system controller environmental events
+                         */
+                        if(scd->scd_nasid == event_nasid) {
+                                scdrv_event_init(scd);
+                        }
 	}
 	return 0;
 }
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h
index c22c6c55e254..a9efc13cc858 100644
--- a/drivers/char/snsc.h
+++ b/drivers/char/snsc.h
@@ -47,4 +47,44 @@ struct sysctl_data_s {
 	nasid_t scd_nasid;	/* Node on which subchannels are opened. */
 };
 
+
+/* argument types */
+#define IR_ARG_INT              0x00    /* 4-byte integer (big-endian)  */
+#define IR_ARG_ASCII            0x01    /* null-terminated ASCII string */
+#define IR_ARG_UNKNOWN          0x80    /* unknown data type.  The low
+                                         * 7 bits will contain the data
+                                         * length.                      */
+#define IR_ARG_UNKNOWN_LENGTH_MASK	0x7f
+
+
+/* system controller event codes */
+#define EV_CLASS_MASK		0xf000ul
+#define EV_SEVERITY_MASK	0x0f00ul
+#define EV_COMPONENT_MASK	0x00fful
+
+#define EV_CLASS_POWER		0x1000ul
+#define EV_CLASS_FAN		0x2000ul
+#define EV_CLASS_TEMP		0x3000ul
+#define EV_CLASS_ENV		0x4000ul
+#define EV_CLASS_TEST_FAULT	0x5000ul
+#define EV_CLASS_TEST_WARNING	0x6000ul
+#define EV_CLASS_PWRD_NOTIFY	0x8000ul
+
+#define EV_SEVERITY_POWER_STABLE	0x0000ul
+#define EV_SEVERITY_POWER_LOW_WARNING	0x0100ul
+#define EV_SEVERITY_POWER_HIGH_WARNING	0x0200ul
+#define EV_SEVERITY_POWER_HIGH_FAULT	0x0300ul
+#define EV_SEVERITY_POWER_LOW_FAULT	0x0400ul
+
+#define EV_SEVERITY_FAN_STABLE		0x0000ul
+#define EV_SEVERITY_FAN_WARNING		0x0100ul
+#define EV_SEVERITY_FAN_FAULT		0x0200ul
+
+#define EV_SEVERITY_TEMP_STABLE		0x0000ul
+#define EV_SEVERITY_TEMP_ADVISORY	0x0100ul
+#define EV_SEVERITY_TEMP_CRITICAL	0x0200ul
+#define EV_SEVERITY_TEMP_FAULT		0x0300ul
+
+void scdrv_event_init(struct sysctl_data_s *);
+
 #endif /* _SN_SYSCTL_H_ */
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 123c1a50a9dc..f914f6da077c 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -64,6 +64,7 @@
 
 #define  SN_SAL_SYSCTL_IOBRICK_PCI_OP		   0x02000042	// reentrant
 #define	 SN_SAL_IROUTER_OP			   0x02000043
+#define  SN_SAL_SYSCTL_EVENT                       0x02000044
 #define  SN_SAL_IOIF_INTERRUPT			   0x0200004a
 #define  SN_SAL_HWPERF_OP			   0x02000050   // lock
 #define  SN_SAL_IOIF_ERROR_INTERRUPT		   0x02000051
@@ -850,6 +851,19 @@ ia64_sn_irtr_intr_disable(nasid_t nasid, int subch, u64 intr)
 	return (int) rv.v0;
 }
 
+/*
+ * Set up a node as the point of contact for system controller
+ * environmental event delivery.
+ */
+static inline int
+ia64_sn_sysctl_event_init(nasid_t nasid)
+{
+        struct ia64_sal_retval rv;
+        SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_EVENT, (u64) nasid,
+			   0, 0, 0, 0, 0, 0);
+        return (int) rv.v0;
+}
+
 /**
  * ia64_sn_get_fit_compt - read a FIT entry from the PROM header
  * @nasid: NASID of node to read

From fc626b278a05a0fe3eb9abd1733120f2f400cbcd Mon Sep 17 00:00:00 2001
From: Greg Howard <ghoward@sgi.com>
Date: Mon, 25 Apr 2005 13:29:46 -0700
Subject: [PATCH 32/34] [IA64-SGI] snsc_event.c new file

Forgot the "bk new" to add this file. Part of the patch
from Greg Howard

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/char/snsc_event.c | 304 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 304 insertions(+)
 create mode 100644 drivers/char/snsc_event.c

diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
new file mode 100644
index 000000000000..d692af57213a
--- /dev/null
+++ b/drivers/char/snsc_event.c
@@ -0,0 +1,304 @@
+/*
+ * SN Platform system controller communication support
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * System controller event handler
+ *
+ * These routines deal with environmental events arriving from the
+ * system controllers.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/byteorder/generic.h>
+#include <asm/sn/sn_sal.h>
+#include "snsc.h"
+
+static struct subch_data_s *event_sd;
+
+void scdrv_event(unsigned long);
+DECLARE_TASKLET(sn_sysctl_event, scdrv_event, 0);
+
+/*
+ * scdrv_event_interrupt
+ *
+ * Pull incoming environmental events off the physical link to the
+ * system controller and put them in a temporary holding area in SAL.
+ * Schedule scdrv_event() to move them along to their ultimate
+ * destination.
+ */
+static irqreturn_t
+scdrv_event_interrupt(int irq, void *subch_data, struct pt_regs *regs)
+{
+	struct subch_data_s *sd = subch_data;
+	unsigned long flags;
+	int status;
+
+	spin_lock_irqsave(&sd->sd_rlock, flags);
+	status = ia64_sn_irtr_intr(sd->sd_nasid, sd->sd_subch);
+
+	if ((status > 0) && (status & SAL_IROUTER_INTR_RECV)) {
+		tasklet_schedule(&sn_sysctl_event);
+	}
+	spin_unlock_irqrestore(&sd->sd_rlock, flags);
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * scdrv_parse_event
+ *
+ * Break an event (as read from SAL) into useful pieces so we can decide
+ * what to do with it.
+ */
+static int
+scdrv_parse_event(char *event, int *src, int *code, int *esp_code, char *desc)
+{
+	char *desc_end;
+
+	/* record event source address */
+	*src = be32_to_cpup((__be32 *)event);
+	event += 4; 			/* move on to event code */
+
+	/* record the system controller's event code */
+	*code = be32_to_cpup((__be32 *)event);
+	event += 4;			/* move on to event arguments */
+
+	/* how many arguments are in the packet? */
+	if (*event++ != 2) {
+		/* if not 2, give up */
+		return -1;
+	}
+
+	/* parse out the ESP code */
+	if (*event++ != IR_ARG_INT) {
+		/* not an integer argument, so give up */
+		return -1;
+	}
+	*esp_code = be32_to_cpup((__be32 *)event);
+	event += 4;
+
+	/* parse out the event description */
+	if (*event++ != IR_ARG_ASCII) {
+		/* not an ASCII string, so give up */
+		return -1;
+	}
+	event[CHUNKSIZE-1] = '\0';	/* ensure this string ends! */
+	event += 2; 			/* skip leading CR/LF */
+	desc_end = desc + sprintf(desc, "%s", event);
+
+	/* strip trailing CR/LF (if any) */
+	for (desc_end--;
+	     (desc_end != desc) && ((*desc_end == 0xd) || (*desc_end == 0xa));
+	     desc_end--) {
+		*desc_end = '\0';
+	}
+
+	return 0;
+}
+
+
+/*
+ * scdrv_event_severity
+ *
+ * Figure out how urgent a message we should write to the console/syslog
+ * via printk.
+ */
+static char *
+scdrv_event_severity(int code)
+{
+	int ev_class = (code & EV_CLASS_MASK);
+	int ev_severity = (code & EV_SEVERITY_MASK);
+	char *pk_severity = KERN_NOTICE;
+
+	switch (ev_class) {
+	case EV_CLASS_POWER:
+		switch (ev_severity) {
+		case EV_SEVERITY_POWER_LOW_WARNING:
+		case EV_SEVERITY_POWER_HIGH_WARNING:
+			pk_severity = KERN_WARNING;
+			break;
+		case EV_SEVERITY_POWER_HIGH_FAULT:
+		case EV_SEVERITY_POWER_LOW_FAULT:
+			pk_severity = KERN_ALERT;
+			break;
+		}
+		break;
+	case EV_CLASS_FAN:
+		switch (ev_severity) {
+		case EV_SEVERITY_FAN_WARNING:
+			pk_severity = KERN_WARNING;
+			break;
+		case EV_SEVERITY_FAN_FAULT:
+			pk_severity = KERN_CRIT;
+			break;
+		}
+		break;
+	case EV_CLASS_TEMP:
+		switch (ev_severity) {
+		case EV_SEVERITY_TEMP_ADVISORY:
+			pk_severity = KERN_WARNING;
+			break;
+		case EV_SEVERITY_TEMP_CRITICAL:
+			pk_severity = KERN_CRIT;
+			break;
+		case EV_SEVERITY_TEMP_FAULT:
+			pk_severity = KERN_ALERT;
+			break;
+		}
+		break;
+	case EV_CLASS_ENV:
+		pk_severity = KERN_ALERT;
+		break;
+	case EV_CLASS_TEST_FAULT:
+		pk_severity = KERN_ALERT;
+		break;
+	case EV_CLASS_TEST_WARNING:
+		pk_severity = KERN_WARNING;
+		break;
+	case EV_CLASS_PWRD_NOTIFY:
+		pk_severity = KERN_ALERT;
+		break;
+	}
+
+	return pk_severity;
+}
+
+
+/*
+ * scdrv_dispatch_event
+ *
+ * Do the right thing with an incoming event.  That's often nothing
+ * more than printing it to the system log.  For power-down notifications
+ * we start a graceful shutdown.
+ */
+static void
+scdrv_dispatch_event(char *event, int len)
+{
+	int code, esp_code, src;
+	char desc[CHUNKSIZE];
+	char *severity;
+
+	if (scdrv_parse_event(event, &src, &code, &esp_code, desc) < 0) {
+		/* ignore uninterpretible event */
+		return;
+	}
+
+	/* how urgent is the message? */
+	severity = scdrv_event_severity(code);
+
+	if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) {
+		struct task_struct *p;
+
+		/* give a SIGPWR signal to init proc */
+
+		/* first find init's task */
+		read_lock(&tasklist_lock);
+		for_each_process(p) {
+			if (p->pid == 1)
+				break;
+		}
+		if (p) { /* we found init's task */
+			printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n");
+			force_sig(SIGPWR, p);
+		} else { /* failed to find init's task - just give message(s) */
+			printk(KERN_WARNING "Failed to find init proc to handle power off!\n");
+			printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
+		}
+		read_unlock(&tasklist_lock);
+	} else {
+		/* print to system log */
+		printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
+	}
+}
+
+
+/*
+ * scdrv_event
+ *
+ * Called as a tasklet when an event arrives from the L1.  Read the event
+ * from where it's temporarily stored in SAL and call scdrv_dispatch_event()
+ * to send it on its way.  Keep trying to read events until SAL indicates
+ * that there are no more immediately available.
+ */
+void
+scdrv_event(unsigned long dummy)
+{
+	int status;
+	int len;
+	unsigned long flags;
+	struct subch_data_s *sd = event_sd;
+
+	/* anything to read? */
+	len = CHUNKSIZE;
+	spin_lock_irqsave(&sd->sd_rlock, flags);
+	status = ia64_sn_irtr_recv(sd->sd_nasid, sd->sd_subch,
+				   sd->sd_rb, &len);
+
+	while (!(status < 0)) {
+		spin_unlock_irqrestore(&sd->sd_rlock, flags);
+		scdrv_dispatch_event(sd->sd_rb, len);
+		len = CHUNKSIZE;
+		spin_lock_irqsave(&sd->sd_rlock, flags);
+		status = ia64_sn_irtr_recv(sd->sd_nasid, sd->sd_subch,
+					   sd->sd_rb, &len);
+	}
+	spin_unlock_irqrestore(&sd->sd_rlock, flags);
+}
+
+
+/*
+ * scdrv_event_init
+ *
+ * Sets up a system controller subchannel to begin receiving event
+ * messages. This is sort of a specialized version of scdrv_open()
+ * in drivers/char/sn_sysctl.c.
+ */
+void
+scdrv_event_init(struct sysctl_data_s *scd)
+{
+	int rv;
+
+	event_sd = kmalloc(sizeof (struct subch_data_s), GFP_KERNEL);
+	if (event_sd == NULL) {
+		printk(KERN_WARNING "%s: couldn't allocate subchannel info"
+		       " for event monitoring\n", __FUNCTION__);
+		return;
+	}
+
+	/* initialize subch_data_s fields */
+	memset(event_sd, 0, sizeof (struct subch_data_s));
+	event_sd->sd_nasid = scd->scd_nasid;
+	spin_lock_init(&event_sd->sd_rlock);
+
+	/* ask the system controllers to send events to this node */
+	event_sd->sd_subch = ia64_sn_sysctl_event_init(scd->scd_nasid);
+
+	if (event_sd->sd_subch < 0) {
+		kfree(event_sd);
+		printk(KERN_WARNING "%s: couldn't open event subchannel\n",
+		       __FUNCTION__);
+		return;
+	}
+
+	/* hook event subchannel up to the system controller interrupt */
+	rv = request_irq(SGI_UART_VECTOR, scdrv_event_interrupt,
+			 SA_SHIRQ | SA_INTERRUPT,
+			 "system controller events", event_sd);
+	if (rv) {
+		printk(KERN_WARNING "%s: irq request failed (%d)\n",
+		       __FUNCTION__, rv);
+		ia64_sn_irtr_close(event_sd->sd_nasid, event_sd->sd_subch);
+		kfree(event_sd);
+		return;
+	}
+}
+
+

From 605036cf8443b9172b24954dc1fd218e2049f91b Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@sgi.com>
Date: Mon, 25 Apr 2005 13:31:04 -0700
Subject: [PATCH 33/34] From: jbarnes@sgi.com

[IA64] fix ia64 Kconfig to allow CONFIG_PM on sn2

This probably should have been fixed when I fixed up the generic build for
discontig+numa machines, but oh well.

CONFIG_PM is allowable for generic builds but not for sn2 builds, which
doesn't make much sense, and in fact breaks the build if recent ACPI bits are
added to the tree.  It looks like the only arch that needs to prevent
CONFIG_PM stuff is the ski simulator (though those options could probably use
some cleanup as well), so remove the big conditional and replace it with a
simple test for IA64_HP_SIM instead.

Signed-off-by: Jesse Barnes <jbarnes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 33fcb205fcb7..468dbe8a6b9c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -329,7 +329,7 @@ menu "Power management and ACPI"
 
 config PM
 	bool "Power Management support"
-	depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB
+	depends on !IA64_HP_SIM
 	default y
 	help
 	  "Power Management" means that parts of your computer are shut

From 2d29306b231a1a0e7a70166c10e4c0f917b21334 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 26 Apr 2005 09:04:31 -0700
Subject: [PATCH 34/34] [IA64] re-enable preempt before page allocation for
 pgtable quicklist

This is a fix to the pgtable_quicklist code.  There is a GFP_KERNEL
allocation in pgtable_quicklist_alloc(), which spews the usual warnings
if the kernel is under heavy VM pressure and the reclaim code is
invoked.  re-enable preempt before we allocate the new page.

This patch is against 2.6.12-rc2-mm2

Signed-off-by:  Martin Hicks <mort@sgi.com>
Signed-off-by:  Tony Luck <tony.luckintel.com>
---
 include/asm-ia64/pgalloc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 2b7127330ae1..a5f214554afd 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -49,12 +49,12 @@ static inline void *pgtable_quicklist_alloc(void)
 		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_quicklist_size;
+		preempt_enable();
 	} else {
+		preempt_enable();
 		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	}
 
-	preempt_enable();
-
 	return ret;
 }