diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 24f21c726dfa..f10e2f712394 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, static int dma_4v_supported(struct device *dev, u64 device_mask) { struct iommu *iommu = dev->archdata.iommu; - u64 dma_addr_mask; + u64 dma_addr_mask = iommu->dma_addr_mask; - if (device_mask > DMA_BIT_MASK(32) && iommu->atu) - dma_addr_mask = iommu->atu->dma_addr_mask; - else - dma_addr_mask = iommu->dma_addr_mask; + if (device_mask > DMA_BIT_MASK(32)) { + if (iommu->atu) + dma_addr_mask = iommu->atu->dma_addr_mask; + else + return 0; + } if ((device_mask & dma_addr_mask) == dma_addr_mask) return 1; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fdf31040a7dc..3218bc43302e 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -622,22 +622,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. + * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; + + /* Record the overall number of mondos received by the + * first of the remaining cpus. + */ + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } + + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); prev_sent = n_sent; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; - - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; - - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; - - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. - */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } + + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); + + retries++; } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; - +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); - return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 196ee5eb4d48..ad31af1dd726 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block);