From b66ac63e20b3f4d7931e67c986956aa5ffbea57f Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 4 Apr 2013 19:49:53 +0200 Subject: [PATCH 01/59] s390/kdump: Add PM notifier for kdump For s390 the page table mapping for the crashkernel memory is removed to protect the pre-loaded kdump kernel and ramdisk. Because the crashkernel memory is not included in the page tables for suspend/resume it is not included in the suspend image. Therefore after resume the resumed system does no longer contain the pre-loaded kdump kernel and when kdump is triggered it fails. This patch adds a PM notifier that creates the page tables before suspend is done and removes them for resume. This ensures that the kdump kernel is included in the suspend image. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/machine_kexec.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b3de27700016..ac2178161ec3 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,35 @@ void setup_regs(void) memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); } +/* + * PM notifier callback for kdump + */ +static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + if (crashk_res.start) + crash_map_reserved_pages(); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + if (crashk_res.start) + crash_unmap_reserved_pages(); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init machine_kdump_pm_init(void) +{ + pm_notifier(machine_kdump_pm_cb, 0); + return 0; +} +arch_initcall(machine_kdump_pm_init); #endif /* From f752ac4d7d96159776b0eea47489796bd90856fd Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 16 Apr 2013 13:25:06 +0200 Subject: [PATCH 02/59] s390/mm: protection exception PSW for aborted transaction Protection exception usually are suppressing and the fault handler needs to rewind the PSW by the instruction length to get the correct fault address. Except for protection exceptions while the CPU is in the middle of a transaction. The CPU stores the transaction abort PSW at the start of the transaction, if the transaction is aborted the PSW is already correct and may not be modified by the fault handler. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2fb9e63b8fc4..047c3e4c59a2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -395,8 +395,13 @@ void __kprobes do_protection_exception(struct pt_regs *regs) int fault; trans_exc_code = regs->int_parm_long; - /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + /* + * Protection exceptions are suppressing, decrement psw address. + * The exception to this rule are aborted transactions, for these + * the PSW already points to the correct location. + */ + if (!(regs->int_code & 0x200)) + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code From 7919e91b34316ee30b14334389e005eb2e9b8e39 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 28 Feb 2013 11:08:54 +0100 Subject: [PATCH 03/59] s390/mm: zero page cache synonyms for zEC12 To avoid cache synonyms on System zEC12 32 independent zero pages are required, one for each combination for bits 2**12 to 2**16 of the virtual address. To avoid wasting too much memory on small virtual systems the number of zero pages is limited to 4 if the memory size is less or equal to 64MB. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/init.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 49ce6bb2c641..9f9c315b4c07 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -63,10 +63,18 @@ static unsigned long __init setup_zero_pages(void) break; case 0x2097: /* z10 */ case 0x2098: /* z10 */ - default: + case 0x2817: /* z196 */ + case 0x2818: /* z196 */ order = 2; break; + case 0x2827: /* zEC12 */ + default: + order = 5; + break; } + /* Limit number of empty zero pages for small memory sizes */ + if (order > 2 && totalram_pages <= 16384) + order = 2; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!empty_zero_page) From 08b421245692f3d10f010f02dae69df6a305271c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 25 Feb 2013 22:09:25 +0800 Subject: [PATCH 04/59] s390/pci: use kmem_cache_zalloc instead of kmem_cache_alloc/memset Using kmem_cache_zalloc() instead of kmem_cache_alloc() and memset(). Signed-off-by: Wei Yongjun Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 27b4c17855b9..fddf847e71ac 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -249,10 +249,9 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) if (zdev->fmb) return -EINVAL; - zdev->fmb = kmem_cache_alloc(zdev_fmb_cache, GFP_KERNEL); + zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); if (!zdev->fmb) return -ENOMEM; - memset(zdev->fmb, 0, sizeof(*zdev->fmb)); WARN_ON((u64) zdev->fmb & 0xf); args.fmb_addr = virt_to_phys(zdev->fmb); From 0bcc94baca601c266ded6347c614b180316a383c Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Tue, 5 Mar 2013 10:39:55 +0100 Subject: [PATCH 05/59] s390/dis: use explicit buf len Pass buffer length in extra parameter. Signed-off-by: Stefan Raspl Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 3 ++- arch/s390/kernel/dis.c | 9 +++++---- arch/s390/kvm/trace.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 94e749c90230..6b499870662f 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -161,7 +161,8 @@ extern unsigned long thread_saved_pc(struct task_struct *t); extern void show_code(struct pt_regs *regs); extern void print_fn_code(unsigned char *code, unsigned long len); -extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]); +extern int insn_to_mnemonic(unsigned char *instruction, char *buf, + unsigned int len); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3ad5e9540160..7f4a4a8c847c 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1696,14 +1696,15 @@ static struct insn *find_insn(unsigned char *code) * insn_to_mnemonic - decode an s390 instruction * @instruction: instruction to decode * @buf: buffer to fill with mnemonic + * @len: length of buffer * * Decode the instruction at @instruction and store the corresponding - * mnemonic into @buf. + * mnemonic into @buf of length @len. * @buf is left unchanged if the instruction could not be decoded. * Returns: * %0 on success, %-ENOENT if the instruction was not found. */ -int insn_to_mnemonic(unsigned char *instruction, char buf[8]) +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) { struct insn *insn; @@ -1711,10 +1712,10 @@ int insn_to_mnemonic(unsigned char *instruction, char buf[8]) if (!insn) return -ENOENT; if (insn->name[0] == '\0') - snprintf(buf, 8, "%s", + snprintf(buf, len, "%s", long_insn_name[(int) insn->name[1]]); else - snprintf(buf, 8, "%.5s", insn->name); + snprintf(buf, len, "%.5s", insn->name); return 0; } EXPORT_SYMBOL_GPL(insn_to_mnemonic); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 2b29e62351d3..53252d2d4720 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -117,7 +117,7 @@ TRACE_EVENT(kvm_s390_intercept_instruction, __entry->instruction, insn_to_mnemonic((unsigned char *) &__entry->instruction, - __entry->insn) ? + __entry->insn, sizeof(__entry->insn)) ? "unknown" : __entry->insn) ); From 98ae9b020d4bd18e9976d31374d47b382ceab8b3 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 6 Mar 2013 11:06:57 +0100 Subject: [PATCH 06/59] s390/s390dbf.txt: Add doc: Debug views are removed in debug_unregister() Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- Documentation/s390/s390dbf.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index ae66f9b90a25..fcaf0b4efba2 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -143,7 +143,8 @@ Parameter: id: handle for debug log Return Value: none -Description: frees memory for a debug log +Description: frees memory for a debug log and removes all registered debug + views. Must not be called within an interrupt handler --------------------------------------------------------------------------- From a75a282d7238f6020957b14d8a14a1e851b9e1dd Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Thu, 7 Mar 2013 01:33:55 +0530 Subject: [PATCH 07/59] s390/monreader: Remove redundant NULL check before kfree kfree on NULL pointer is a no-op. Signed-off-by: Syam Sidhardhan Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/monreader.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index f4ff515db251..0da3ae3cd63b 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -174,8 +174,7 @@ static void mon_free_mem(struct mon_private *monpriv) int i; for (i = 0; i < MON_MSGLIM; i++) - if (monpriv->msg_array[i]) - kfree(monpriv->msg_array[i]); + kfree(monpriv->msg_array[i]); kfree(monpriv); } From cce0eacc225b402824e2fc72936b3796e7659fc6 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 11 Mar 2013 12:58:18 +0100 Subject: [PATCH 08/59] s390/cio: collect format 1 channel-path description data Collect format 1 channel-path description data for each CHPID and update the information in one place. Reviewed-by: Sebastian Ott Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 22 +++++++++++++++++++++- drivers/s390/cio/chp.h | 2 ++ drivers/s390/cio/chsc.c | 11 ++++++----- drivers/s390/cio/css.c | 5 ++--- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 50ad5fdd815d..21fabc6d5a9c 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -376,6 +376,26 @@ static void chp_release(struct device *dev) kfree(cp); } +/** + * chp_update_desc - update channel-path description + * @chp - channel-path + * + * Update the channel-path description of the specified channel-path. + * Return zero on success, non-zero otherwise. + */ +int chp_update_desc(struct channel_path *chp) +{ + int rc; + + rc = chsc_determine_base_channel_path_desc(chp->chpid, &chp->desc); + if (rc) + return rc; + + rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1); + + return rc; +} + /** * chp_new - register a new channel-path * @chpid - channel-path ID @@ -403,7 +423,7 @@ int chp_new(struct chp_id chpid) mutex_init(&chp->lock); /* Obtain channel path description and fill it in. */ - ret = chsc_determine_base_channel_path_desc(chpid, &chp->desc); + ret = chp_update_desc(chp); if (ret) goto out_free; if ((chp->desc.flags & 0x80) == 0) { diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h index e1399dbee834..9284b785a06f 100644 --- a/drivers/s390/cio/chp.h +++ b/drivers/s390/cio/chp.h @@ -44,6 +44,7 @@ struct channel_path { struct mutex lock; /* Serialize access to below members. */ int state; struct channel_path_desc desc; + struct channel_path_desc_fmt1 desc_fmt1; /* Channel-measurement related stuff: */ int cmg; int shared; @@ -62,6 +63,7 @@ int chp_is_registered(struct chp_id chpid); void *chp_get_chp_desc(struct chp_id chpid); void chp_remove_cmg_attr(struct channel_path *chp); int chp_add_cmg_attr(struct channel_path *chp); +int chp_update_desc(struct channel_path *chp); int chp_new(struct chp_id chpid); void chp_cfg_schedule(struct chp_id chpid, int configure); void chp_cfg_cancel_deconfigure(struct chp_id chpid); diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index e16c553f6556..8ea7d9b2c671 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -376,7 +376,7 @@ static void chsc_process_sei_chp_avail(struct chsc_sei_nt0_area *sei_area) continue; } mutex_lock(&chp->lock); - chsc_determine_base_channel_path_desc(chpid, &chp->desc); + chp_update_desc(chp); mutex_unlock(&chp->lock); } } @@ -631,8 +631,8 @@ int chsc_chp_vary(struct chp_id chpid, int on) * Redo PathVerification on the devices the chpid connects to */ if (on) { - /* Try to update the channel path descritor. */ - chsc_determine_base_channel_path_desc(chpid, &chp->desc); + /* Try to update the channel path description. */ + chp_update_desc(chp); for_each_subchannel_staged(s390_subchannel_vary_chpid_on, __s390_vary_chpid_on, &chpid); } else @@ -825,9 +825,10 @@ int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid, { struct chsc_response_struct *chsc_resp; struct chsc_scpd *scpd_area; + unsigned long flags; int ret; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); scpd_area = chsc_page; ret = chsc_determine_channel_path_desc(chpid, 0, 0, 1, 0, scpd_area); if (ret) @@ -835,7 +836,7 @@ int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid, chsc_resp = (void *)&scpd_area->response; memcpy(desc, &chsc_resp->data, sizeof(*desc)); out: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index a239237d43f3..658d9349c837 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1065,9 +1065,8 @@ void channel_subsystem_reinit(void) chsc_enable_facility(CHSC_SDA_OC_MSS); chp_id_for_each(&chpid) { chp = chpid_to_chp(chpid); - if (!chp) - continue; - chsc_determine_base_channel_path_desc(chpid, &chp->desc); + if (chp) + chp_update_desc(chp); } } From 040495d110ba9edc347b3af0e119d1b0d0a8ac87 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 11 Mar 2013 13:01:08 +0100 Subject: [PATCH 09/59] s390/cio: make use of newly added format 1 channel-path data Make use of the stored copy of format 1 channel-path data instead of querying the information every time the corresponding function is called. Reviewed-by: Sebastian Ott Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_ops.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c77b6e06bf64..4845d64f2842 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -704,9 +704,9 @@ EXPORT_SYMBOL(ccw_device_tm_start_timeout); int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask) { struct subchannel *sch = to_subchannel(cdev->dev.parent); - struct channel_path_desc_fmt1 desc; + struct channel_path *chp; struct chp_id chpid; - int mdc = 0, ret, i; + int mdc = 0, i; /* Adjust requested path mask to excluded varied off paths. */ if (mask) @@ -719,14 +719,20 @@ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask) if (!(mask & (0x80 >> i))) continue; chpid.id = sch->schib.pmcw.chpid[i]; - ret = chsc_determine_fmt1_channel_path_desc(chpid, &desc); - if (ret) - return ret; - if (!desc.f) + chp = chpid_to_chp(chpid); + if (!chp) + continue; + + mutex_lock(&chp->lock); + if (!chp->desc_fmt1.f) { + mutex_unlock(&chp->lock); return 0; - if (!desc.r) + } + if (!chp->desc_fmt1.r) mdc = 1; - mdc = mdc ? min(mdc, (int)desc.mdc) : desc.mdc; + mdc = mdc ? min_t(int, mdc, chp->desc_fmt1.mdc) : + chp->desc_fmt1.mdc; + mutex_unlock(&chp->lock); } return mdc; From 3d04fea5e7527e950b31542f27df5cb51ca581b4 Mon Sep 17 00:00:00 2001 From: Stelian Nirlu Date: Mon, 11 Mar 2013 18:22:10 +0200 Subject: [PATCH 10/59] s390/bpf,jit: use kcalloc instead of kmalloc and memset Signed-off-by: Stelian Nirlu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0972e91cced2..82f165f8078c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -747,10 +747,9 @@ void bpf_jit_compile(struct sk_filter *fp) if (!bpf_jit_enable) return; - addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) return; - memset(addrs, 0, fp->len * sizeof(*addrs)); memset(&jit, 0, sizeof(cjit)); memset(&cjit, 0, sizeof(cjit)); From 006485dc4859a08c71879ce1d82e88f9105488a2 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 12 Mar 2013 13:18:47 +0800 Subject: [PATCH 11/59] s390/dasd: remove cast for kzalloc return value remove cast for kzalloc return value. Signed-off-by: Zhang Yanfei Signed-off-by: Heiko Carstens --- drivers/s390/block/dasd_devmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index c196827c228f..a71bb8aaca1d 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -410,8 +410,7 @@ dasd_add_busid(const char *bus_id, int features) struct dasd_devmap *devmap, *new, *tmp; int hash; - new = (struct dasd_devmap *) - kzalloc(sizeof(struct dasd_devmap), GFP_KERNEL); + new = kzalloc(sizeof(struct dasd_devmap), GFP_KERNEL); if (!new) return ERR_PTR(-ENOMEM); spin_lock(&dasd_devmap_lock); From 8fe853f39368d942d438d40a0c737a82266c3b1c Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Wed, 13 Mar 2013 21:46:08 +0200 Subject: [PATCH 12/59] s390/cmm: Removed useless label Rewrote conditional statement and eliminated the out_kthread label. Signed-off-by: Alexandru Gheorghiu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 479e94282910..9d84a1feefef 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -458,12 +458,10 @@ static int __init cmm_init(void) if (rc) goto out_pm; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); - rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; - if (rc) - goto out_kthread; - return 0; + if (!IS_ERR(cmm_thread_ptr)) + return 0; -out_kthread: + rc = PTR_ERR(cmm_thread_ptr); unregister_pm_notifier(&cmm_power_notifier); out_pm: unregister_oom_notifier(&cmm_oom_nb); From 2b55732ad23fcc9eaec05acc21fd579ef8a9f38b Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Wed, 13 Mar 2013 21:12:38 +0200 Subject: [PATCH 13/59] s390/hypfs: Use PTR_RET function Used PTR_RET function instead of IS_ERR and PTR_ERR. Patch found using coccinelle. Signed-off-by: Alexandru Gheorghiu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_dbfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 9fd4a40c6752..bb5dd496614f 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -105,9 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) int hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - if (IS_ERR(dbfs_dir)) - return PTR_ERR(dbfs_dir); - return 0; + return PTR_RET(dbfs_dir); } void hypfs_dbfs_exit(void) From 1bca09f7144450989e409c82ff0db83dddf489ac Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Mar 2013 13:44:25 +0100 Subject: [PATCH 14/59] s390/dumpstack: fix call chain walking dumpstack() did not always print a sane callchain when being called. The reason is that show_trace() accessed register 15 directly to get the current stack pointer and passed that pointer to __show_trace() which expects a valid stack frame pointer as argument. However due to tail call optimization the stack frame may not exist anymore when __show_trace() gets called and therefore an invalid stack frame pointer gets passed. To prevent that disable tail call optimization for call chain walking functions. So move all the show_* functions to a dumpstack.c file like other architectures have it already and add a -fno-optimize-sibling-calls compile flag to both dumpstack.c and stacktrace.c to prevent tail call optimization. Fixes callchains that looked e.g. like this: [ 12.868258] Call Trace: [ 12.868262] ([<0000000000008000>] 0x8000) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 17 ++- arch/s390/kernel/dumpstack.c | 236 ++++++++++++++++++++++++++++++++ arch/s390/kernel/traps.c | 252 +---------------------------------- 3 files changed, 254 insertions(+), 251 deletions(-) create mode 100644 arch/s390/kernel/dumpstack.c diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2ac311ef5c9b..1386fcaf4ef6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -13,6 +13,14 @@ endif # CFLAGS_smp.o := -Wno-nonnull +# +# Disable tailcall optimizations for stack / callchain walking functions +# since this might generate broken code when accessing register 15 and +# passing its content to other functions. +# +CFLAGS_stacktrace.o += -fno-optimize-sibling-calls +CFLAGS_dumpstack.o += -fno-optimize-sibling-calls + # # Pass UTS_MACHINE for user_regset definition # @@ -20,10 +28,11 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ - processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ - debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o +obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += dumpstack.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c new file mode 100644 index 000000000000..03dce39d01ee --- /dev/null +++ b/arch/s390/kernel/dumpstack.c @@ -0,0 +1,236 @@ +/* + * Stack dumping functions + * + * Copyright IBM Corp. 1999, 2013 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); + print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, + S390_lowcore.panic_stack); +#endif + sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long *__r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); + print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); +#endif +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + printk("CPU: %d %s %s %.*s\n", + task_thread_info(current)->cpu, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + printk("Process %s (pid: %d, task: %p, ksp: %p)\n", + current->comm, current->pid, current, + (void *) current->thread.ksp); + show_stack(NULL, NULL); +} +EXPORT_SYMBOL(dump_stack); + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = user_mode(regs) ? "User" : "Krnl"; + printk("%s PSW : %p %p", + mode, (void *) regs->psw.mask, + (void *) regs->psw.addr); + print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + printk("CPU: %d %s %s %.*s\n", + task_thread_info(current)->cpu, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + printk("Process %s (pid: %d, task: %p, ksp: %p)\n", + current->comm, current->pid, current, + (void *) current->thread.ksp); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!user_mode(regs)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 13dd63fba367..c5762324d9ee 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -12,49 +12,16 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "entry.h" int show_unhandled_signals = 1; -#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) - -#ifndef CONFIG_64BIT -#define LONG "%08lx " -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define LONG "%016lx " -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ - static inline void __user *get_trap_ip(struct pt_regs *regs) { #ifdef CONFIG_64BIT @@ -72,215 +39,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) #endif } -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } -} - -static void show_trace(struct task_struct *task, unsigned long *stack) -{ - register unsigned long __r15 asm ("15"); - unsigned long sp; - - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); -#endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); - else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - if (!task) - task = current; - debug_show_held_locks(task); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - register unsigned long * __r15 asm ("15"); - unsigned long *stack; - int i; - - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk(LONG, *stack++); - } - printk("\n"); - show_trace(task, sp); -} - -static void show_last_breaking_event(struct pt_regs *regs) -{ -#ifdef CONFIG_64BIT - printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); - print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); -#endif -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - -void show_registers(struct pt_regs *regs) -{ - char *mode; - - mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); - printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); -#ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); -#endif - printk("\n%s GPRS: " FOURLONG, mode, - regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, - regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, - regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, - regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); - - show_code(regs); -} - -void show_regs(struct pt_regs *regs) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); - show_last_breaking_event(regs); -} - -static DEFINE_SPINLOCK(die_lock); - -void die(struct pt_regs *regs, const char *str) -{ - static int die_counter; - - oops_enter(); - lgr_info_log(); - debug_stop_all(); - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irq(&die_lock); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception: panic_on_oops"); - oops_exit(); - do_exit(SIGSEGV); -} - static inline void report_user_fault(struct pt_regs *regs, int signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) From f7f8d7e51d3c31426ee006c38d5b0ae3c9b8733e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Mar 2013 16:46:05 +0100 Subject: [PATCH 15/59] s390/mm: speedup storage key initialization Use sske with multiple block control to initialize storage keys within a 1 MB frame at once. It turned out that the sske with mb=1 is an order of magnitude faster than pfmf. This is only an issue for very large systems (several 100GB) where storage key initialization could last more than a minute. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pageattr.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d21040ed5e59..80adfbf75065 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -9,31 +9,25 @@ #include #include +static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) +{ + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + : [addr] "+a" (addr) : [skey] "d" (skey)); + return addr; +} + void storage_key_init_range(unsigned long start, unsigned long end) { - unsigned long boundary, function, size; + unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT2) { - /* set storage keys for a 2GB frame */ - function = 0x22000 | PAGE_DEFAULT_KEY; - size = 1UL << 31; - boundary = (start + size) & ~(size - 1); - if (boundary <= end) { - do { - start = pfmf(function, start); - } while (start < boundary); - continue; - } - } if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ - function = 0x21000 | PAGE_DEFAULT_KEY; size = 1UL << 20; boundary = (start + size) & ~(size - 1); if (boundary <= end) { do { - start = pfmf(function, start); + start = sske_frame(start, PAGE_DEFAULT_KEY); } while (start < boundary); continue; } From 01c2475f6d959ed3beb9ce1b0bc6f8108179af65 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 23 Mar 2013 23:05:29 +0900 Subject: [PATCH 16/59] s390/bitops: remove unnecessary macro definitions in asm/bitops.h Remove unused __BITOPS_ALIGN, and replace __BITOPS_WORDSIZE with BITS_PER_LONG. Signed-off-by: Akinobu Mita Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/bitops.h | 110 ++++++++++++++++----------------- 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 15422933c60b..2bc357408f43 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -61,8 +61,6 @@ extern const char _sb_findmap[]; #ifndef CONFIG_64BIT -#define __BITOPS_ALIGN 3 -#define __BITOPS_WORDSIZE 32 #define __BITOPS_OR "or" #define __BITOPS_AND "nr" #define __BITOPS_XOR "xr" @@ -81,8 +79,6 @@ extern const char _sb_findmap[]; #else /* CONFIG_64BIT */ -#define __BITOPS_ALIGN 7 -#define __BITOPS_WORDSIZE 64 #define __BITOPS_OR "ogr" #define __BITOPS_AND "ngr" #define __BITOPS_XOR "xgr" @@ -101,7 +97,7 @@ extern const char _sb_findmap[]; #endif /* CONFIG_64BIT */ -#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE) +#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) #define __BITOPS_BARRIER() asm volatile("" : : : "memory") #ifdef CONFIG_SMP @@ -114,9 +110,9 @@ static inline void set_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make OR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); } @@ -130,9 +126,9 @@ static inline void clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make AND mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); } @@ -146,9 +142,9 @@ static inline void change_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make XOR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); } @@ -163,9 +159,9 @@ test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make OR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); __BITOPS_BARRIER(); @@ -182,9 +178,9 @@ test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make AND/test mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); __BITOPS_BARRIER(); @@ -201,9 +197,9 @@ test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make XOR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); __BITOPS_BARRIER(); @@ -218,7 +214,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " oc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); @@ -229,7 +225,7 @@ __constant_set_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr |= 1 << (nr & 7); } @@ -246,7 +242,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " nc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); @@ -257,7 +253,7 @@ __constant_clear_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr &= ~(1 << (nr & 7)); } @@ -273,7 +269,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " xc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); @@ -284,7 +280,7 @@ __constant_change_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr ^= 1 << (nr & 7); } @@ -302,7 +298,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " oc %O0(1,%R0),%1" @@ -321,7 +317,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " nc %O0(1,%R0),%1" @@ -340,7 +336,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " xc %O0(1,%R0),%1" @@ -376,7 +372,7 @@ static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(volatile unsigned char *) addr; return (ch >> (nr & 7)) & 1; } @@ -384,7 +380,7 @@ static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr static inline int __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) { return (((volatile char *) addr) - [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0; + [(nr^(BITS_PER_LONG-8))>>3] & (1<<(nr&7))) != 0; } #define test_bit(nr,addr) \ @@ -693,18 +689,18 @@ static inline int find_next_bit_left(const unsigned long *addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { set = __flo_word(0, *p & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit_left(p, size); @@ -736,22 +732,22 @@ static inline int find_next_zero_bit (const unsigned long * addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * __ffz_word returns __BITOPS_WORDSIZE + * __ffz_word returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffz_word(bit, *p >> bit); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_zero_bit(p, size); @@ -773,22 +769,22 @@ static inline int find_next_bit (const unsigned long * addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * __ffs_word returns __BITOPS_WORDSIZE + * __ffs_word returns BITS_PER_LONG * if no one bit is present in the word. */ set = __ffs_word(0, *p & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit(p, size); @@ -843,22 +839,22 @@ static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * s390 version of ffz returns __BITOPS_WORDSIZE + * s390 version of ffz returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffz_word(bit, __load_ulong_le(p, 0) >> bit); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_zero_bit_le(p, size); @@ -885,22 +881,22 @@ static inline int find_next_bit_le(void *vaddr, unsigned long size, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * s390 version of ffz returns __BITOPS_WORDSIZE + * s390 version of ffz returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffs_word(0, __load_ulong_le(p, 0) & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit_le(p, size); From 03ff60df34892e8c471873a088b075a5b621ce0a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 23 Mar 2013 23:05:30 +0900 Subject: [PATCH 17/59] s390/cio: Use BITS_TO_LONGS() instead of __BITOPS_WORDS() Use BITS_TO_LONGS() instead of __BITOPS_WORDS() that is considered to be private macro in asm/bitops.h for s390. Signed-off-by: Akinobu Mita Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/idset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c index 65d13e38803f..5a999084a229 100644 --- a/drivers/s390/cio/idset.c +++ b/drivers/s390/cio/idset.c @@ -17,7 +17,7 @@ struct idset { static inline unsigned long bitmap_size(int num_ssid, int num_id) { - return __BITOPS_WORDS(num_ssid * num_id) * sizeof(unsigned long); + return BITS_TO_LONGS(num_ssid * num_id) * sizeof(unsigned long); } static struct idset *idset_new(int num_ssid, int num_id) From 5294ee00a16567355c85b849742e5219aad880d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 26 Mar 2013 09:05:36 +0100 Subject: [PATCH 18/59] s390/bitops: get rid of __BITOPS_BARRIER() Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/bitops.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 2bc357408f43..4d8604e311f3 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -98,7 +98,6 @@ extern const char _sb_findmap[]; #endif /* CONFIG_64BIT */ #define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) -#define __BITOPS_BARRIER() asm volatile("" : : : "memory") #ifdef CONFIG_SMP /* @@ -164,7 +163,7 @@ test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr) mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); - __BITOPS_BARRIER(); + barrier(); return (old & mask) != 0; } @@ -183,7 +182,7 @@ test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); - __BITOPS_BARRIER(); + barrier(); return (old ^ new) != 0; } @@ -202,7 +201,7 @@ test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr) mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); - __BITOPS_BARRIER(); + barrier(); return (old & mask) != 0; } #endif /* CONFIG_SMP */ From 91c15a951091a64a5f048ff93292057e3b590b6f Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 8 Apr 2013 16:09:31 +0200 Subject: [PATCH 19/59] s390/hibernate: Save and restore absolute zero pages Since commit 5f954c34 ([S390] hibernation: fix lowcore handling) the absolute zero lowcore is lost during suspend/resume. For example, this leads to the problem that the re-IPL device for kdump is no longer set after resume. With this patch during suspend a buffer is allocated in the new PM notifier "suspend_pm_cb" and then the absolute zero lowcore is saved to that buffer. The resume code then copies back this buffer to absolute zero and afterwards the PM notifier releases the memory. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.h | 1 + arch/s390/kernel/suspend.c | 31 +++++++++++++++++++++++++++++++ arch/s390/kernel/swsusp_asm64.S | 29 ++++++++++++++++++++++++++--- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index c3a736a3ed44..aa0ab02e9595 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -7,6 +7,7 @@ #include extern void *restart_stack; +extern unsigned long suspend_zero_pages; void system_call(void); void pgm_check_handler(void); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index aa1494d0e380..c479d2f9605b 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -41,6 +41,7 @@ struct page_key_data { static struct page_key_data *page_key_data; static struct page_key_data *page_key_rp, *page_key_wp; static unsigned long page_key_rx, page_key_wx; +unsigned long suspend_zero_pages; /* * For each page in the hibernation image one additional byte is @@ -149,6 +150,36 @@ int pfn_is_nosave(unsigned long pfn) return 0; } +/* + * PM notifier callback for suspend + */ +static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); + if (!suspend_zero_pages) + return NOTIFY_BAD; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + free_pages(suspend_zero_pages, LC_ORDER); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init suspend_pm_init(void) +{ + pm_notifier(suspend_pm_cb, 0); + return 0; +} +arch_initcall(suspend_pm_init); + void save_processor_state(void) { /* swsusp_arch_suspend() actually saves all cpu register contents. diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index d4ca4e0617b5..c487be4cfc81 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -36,8 +36,8 @@ ENTRY(swsusp_arch_suspend) /* Store prefix register on stack */ stpx __SF_EMPTY(%r15) - /* Save prefix register contents for lowcore */ - llgf %r4,__SF_EMPTY(%r15) + /* Save prefix register contents for lowcore copy */ + llgf %r10,__SF_EMPTY(%r15) /* Get pointer to save area */ lghi %r1,0x1000 @@ -91,7 +91,18 @@ ENTRY(swsusp_arch_suspend) xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) + /* Save absolute zero pages */ + larl %r2,suspend_zero_pages + lg %r2,0(%r2) + lghi %r4,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Copy lowcore to absolute zero lowcore */ lghi %r2,0 + lgr %r4,%r10 lghi %r3,2*PAGE_SIZE lghi %r5,2*PAGE_SIZE 1: mvcle %r2,%r4,0 @@ -248,8 +259,20 @@ restore_registers: /* Load old stack */ lg %r15,0x2f8(%r13) + /* Save prefix register */ + mvc __SF_EMPTY(4,%r15),0x318(%r13) + + /* Restore absolute zero pages */ + lghi %r2,0 + larl %r4,suspend_zero_pages + lg %r4,0(%r4) + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + /* Restore prefix register */ - spx 0x318(%r13) + spx __SF_EMPTY(%r15) /* Activate DAT */ stosm __SF_EMPTY(%r15),0x04 From 188561a462d3b82451d6ba09e2e32c9ba2c9938c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 12:53:21 +0200 Subject: [PATCH 20/59] s390/cio: wait_cons_dev don't use static variable wait_cons_dev is used to busy wait for an interrupt on the console ccw device. Stop using the static console_subchannel and add a parameter to this function to specify on which ccw device/subchannel we have to do the polling. While at it rename the function to ccw_device_wait_idle and move it to device.c Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwdev.h | 1 + arch/s390/include/asm/cio.h | 2 -- drivers/s390/char/con3215.c | 2 +- drivers/s390/char/raw3270.c | 4 ++-- drivers/s390/cio/cio.c | 20 ++------------------ drivers/s390/cio/cio.h | 1 + drivers/s390/cio/device.c | 27 +++++++++++++++++++++++++-- 7 files changed, 32 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index e6061617a50b..cb56fb6cff7e 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -220,6 +220,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) extern struct ccw_device *ccw_device_probe_console(void); +extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(void); int ccw_device_siosl(struct ccw_device *); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ad2b924167d7..ffb898961c8d 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -296,8 +296,6 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } -extern void wait_cons_dev(void); - extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 7b00fa634d40..0d79eec799f1 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -502,7 +502,7 @@ static void raw3215_make_room(struct raw3215_info *raw, unsigned int length) raw3215_try_io(raw); raw->flags &= ~RAW3215_FLUSHING; #ifdef CONFIG_TN3215_CONSOLE - wait_cons_dev(); + ccw_device_wait_idle(raw->cdev); #endif /* Enough room freed up ? */ if (RAW3215_BUFFER_SIZE - raw->count >= length) diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 4c9030a5b9f2..383d6432a1a8 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -796,7 +796,7 @@ struct raw3270 __init *raw3270_setup_console(struct ccw_device *cdev) do { __raw3270_reset_device(rp); while (!raw3270_state_final(rp)) { - wait_cons_dev(); + ccw_device_wait_idle(rp->cdev); barrier(); } } while (rp->state != RAW3270_STATE_READY); @@ -810,7 +810,7 @@ raw3270_wait_cons_dev(struct raw3270 *rp) unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(rp->cdev), flags); - wait_cons_dev(); + ccw_device_wait_idle(rp->cdev); spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); } diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 986ef6a92a41..2c1d53fb2fab 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -656,9 +656,9 @@ static int console_subchannel_in_use; /* * Use cio_tsch to update the subchannel status and call the interrupt handler - * if status had been pending. Called with the console_subchannel lock. + * if status had been pending. Called with the subchannel's lock held. */ -static void cio_tsch(struct subchannel *sch) +void cio_tsch(struct subchannel *sch) { struct irb *irb; int irq_context; @@ -690,22 +690,6 @@ void *cio_get_console_priv(void) return &console_priv; } -/* - * busy wait for the next interrupt on the console - */ -void wait_cons_dev(void) -{ - if (!console_subchannel_in_use) - return; - - while (1) { - cio_tsch(&console_subchannel); - if (console_subchannel.schib.scsw.cmd.actl == 0) - break; - udelay_simple(100); - } -} - static int cio_test_for_console(struct subchannel_id schid, void *data) { diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 4a1ff5c2eb88..3b97c8bb30e5 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -133,6 +133,7 @@ extern int cio_is_console(struct subchannel_id); extern struct subchannel *cio_get_console_subchannel(void); extern spinlock_t * cio_get_console_lock(void); extern void *cio_get_console_priv(void); +extern void cio_tsch(struct subchannel *sch); #else #define cio_is_console(schid) 0 #define cio_get_console_subchannel() NULL diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index c6767f5a58b2..2e1e9086e916 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1612,13 +1613,15 @@ static int ccw_device_console_enable(struct ccw_device *cdev, /* Now wait for the async. recognition to come to an end. */ spin_lock_irq(cdev->ccwlock); while (!dev_fsm_final_state(cdev)) - wait_cons_dev(); + ccw_device_wait_idle(cdev); + rc = -EIO; if (cdev->private->state != DEV_STATE_OFFLINE) goto out_unlock; ccw_device_online(cdev); while (!dev_fsm_final_state(cdev)) - wait_cons_dev(); + ccw_device_wait_idle(cdev); + if (cdev->private->state != DEV_STATE_ONLINE) goto out_unlock; rc = 0; @@ -1655,6 +1658,26 @@ ccw_device_probe_console(void) return &console_cdev; } +/** + * ccw_device_wait_idle() - busy wait for device to become idle + * @cdev: ccw device + * + * Poll until activity control is zero, that is, no function or data + * transfer is pending/active. + * Called with device lock being held. + */ +void ccw_device_wait_idle(struct ccw_device *cdev) +{ + struct subchannel *sch = to_subchannel(cdev->dev.parent); + + while (1) { + cio_tsch(sch); + if (sch->schib.scsw.cmd.actl == 0) + break; + udelay_simple(100); + } +} + static int ccw_device_pm_restore(struct device *dev); int ccw_device_force_console(void) From f10ccca7a555f5e80ed7ecff58e7dfdab03860da Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 12:56:51 +0200 Subject: [PATCH 21/59] s390/cio: ccw_device_force_console don't use static variable force_console is used to wake up the CCW based console device to print a panic message in case something goes wrong in a suspend or resume cycle. Stop using the static console_subchannel and add a parameter to this function to specify which ccw device we have to wake up. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwdev.h | 2 +- drivers/s390/char/con3215.c | 2 +- drivers/s390/char/raw3270.c | 2 +- drivers/s390/cio/device.c | 6 ++---- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index cb56fb6cff7e..f201af8be580 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -221,7 +221,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); extern struct ccw_device *ccw_device_probe_console(void); extern void ccw_device_wait_idle(struct ccw_device *); -extern int ccw_device_force_console(void); +extern int ccw_device_force_console(struct ccw_device *); int ccw_device_siosl(struct ccw_device *); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 0d79eec799f1..eb5d22795c47 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -858,7 +858,7 @@ static void con3215_flush(void) raw = raw3215[0]; /* console 3215 is the first one */ if (raw->port.flags & ASYNC_SUSPENDED) /* The console is still frozen for suspend. */ - if (ccw_device_force_console()) + if (ccw_device_force_console(raw->cdev)) /* Forcing didn't work, no panic message .. */ return; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 383d6432a1a8..24a08e8f19e1 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1274,7 +1274,7 @@ void raw3270_pm_unfreeze(struct raw3270_view *view) rp = view->dev; if (rp && test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) - ccw_device_force_console(); + ccw_device_force_console(rp->cdev); #endif } diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 2e1e9086e916..6ac0066d3158 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1680,11 +1680,9 @@ void ccw_device_wait_idle(struct ccw_device *cdev) static int ccw_device_pm_restore(struct device *dev); -int ccw_device_force_console(void) +int ccw_device_force_console(struct ccw_device *cdev) { - if (!console_cdev_in_use) - return -ENODEV; - return ccw_device_pm_restore(&console_cdev.dev); + return ccw_device_pm_restore(&cdev->dev); } EXPORT_SYMBOL_GPL(ccw_device_force_console); #endif From c135ad1caffe2b35d6316758a605a2b63ca22bb3 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 12:58:55 +0200 Subject: [PATCH 22/59] s390/cio: split subchannel registration Split the subchannel registration in device_initialize and device_add and move the initialization part inside the allocation function. With this change we can use refcounting during the complete lifespan of a subchannel which is important for devices where we do the actually registration at a later time. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 43 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 658d9349c837..8a1294b1cbaf 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -137,26 +137,7 @@ out: static void css_sch_todo(struct work_struct *work); -static struct subchannel * -css_alloc_subchannel(struct subchannel_id schid) -{ - struct subchannel *sch; - int ret; - - sch = kmalloc (sizeof (*sch), GFP_KERNEL | GFP_DMA); - if (sch == NULL) - return ERR_PTR(-ENOMEM); - ret = cio_validate_subchannel (sch, schid); - if (ret < 0) { - kfree(sch); - return ERR_PTR(ret); - } - INIT_WORK(&sch->todo_work, css_sch_todo); - return sch; -} - -static void -css_subchannel_release(struct device *dev) +static void css_subchannel_release(struct device *dev) { struct subchannel *sch; @@ -170,6 +151,25 @@ css_subchannel_release(struct device *dev) } } +static struct subchannel *css_alloc_subchannel(struct subchannel_id schid) +{ + struct subchannel *sch; + int ret; + + sch = kmalloc (sizeof (*sch), GFP_KERNEL | GFP_DMA); + if (sch == NULL) + return ERR_PTR(-ENOMEM); + ret = cio_validate_subchannel (sch, schid); + if (ret < 0) { + kfree(sch); + return ERR_PTR(ret); + } + INIT_WORK(&sch->todo_work, css_sch_todo); + sch->dev.release = &css_subchannel_release; + device_initialize(&sch->dev); + return sch; +} + static int css_sch_device_register(struct subchannel *sch) { int ret; @@ -177,7 +177,7 @@ static int css_sch_device_register(struct subchannel *sch) mutex_lock(&sch->reg_mutex); dev_set_name(&sch->dev, "0.%x.%04x", sch->schid.ssid, sch->schid.sch_no); - ret = device_register(&sch->dev); + ret = device_add(&sch->dev); mutex_unlock(&sch->reg_mutex); return ret; } @@ -282,7 +282,6 @@ static int css_register_subchannel(struct subchannel *sch) /* Initialize the subchannel structure */ sch->dev.parent = &channel_subsystems[0]->device; sch->dev.bus = &css_bus_type; - sch->dev.release = &css_subchannel_release; sch->dev.groups = default_subch_attr_groups; /* * We don't want to generate uevents for I/O subchannels that don't From 863fc8492734822b95671780db803cd9a4b7d923 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:01:50 +0200 Subject: [PATCH 23/59] s390/cio: get rid of static console subchannel Remove the static console subchannel (and friends) and use dynamic allocation for these structures. With this change the console subchanel is treated (mostly) like any other subchannel and we can remove some special cases. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.c | 111 ++++++++++++-------------------------- drivers/s390/cio/cio.h | 5 -- drivers/s390/cio/css.c | 23 ++++---- drivers/s390/cio/css.h | 1 + drivers/s390/cio/device.c | 24 ++++----- 5 files changed, 56 insertions(+), 108 deletions(-) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 2c1d53fb2fab..ab99500604b7 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -540,13 +540,9 @@ int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) memset(sch, 0, sizeof(struct subchannel)); sch->schid = schid; - if (cio_is_console(schid)) { - sch->lock = cio_get_console_lock(); - } else { - err = cio_create_sch_lock(sch); - if (err) - goto out; - } + err = cio_create_sch_lock(sch); + if (err) + goto out; mutex_init(&sch->reg_mutex); /* @@ -580,8 +576,7 @@ int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) sch->schid.ssid, sch->schid.sch_no, sch->st); return 0; out: - if (!cio_is_console(schid)) - kfree(sch->lock); + kfree(sch->lock); sch->lock = NULL; return err; } @@ -650,9 +645,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_CCW_CONSOLE -static struct subchannel console_subchannel; -static struct io_subchannel_private console_priv; -static int console_subchannel_in_use; +static struct subchannel *console_sch; /* * Use cio_tsch to update the subchannel status and call the interrupt handler @@ -685,119 +678,83 @@ void cio_tsch(struct subchannel *sch) } } -void *cio_get_console_priv(void) +static int cio_test_for_console(struct subchannel_id schid, void *data) { - return &console_priv; -} + struct schib schib; -static int -cio_test_for_console(struct subchannel_id schid, void *data) -{ - if (stsch_err(schid, &console_subchannel.schib) != 0) + if (stsch_err(schid, &schib) != 0) return -ENXIO; - if ((console_subchannel.schib.pmcw.st == SUBCHANNEL_TYPE_IO) && - console_subchannel.schib.pmcw.dnv && - (console_subchannel.schib.pmcw.dev == console_devno)) { + if ((schib.pmcw.st == SUBCHANNEL_TYPE_IO) && schib.pmcw.dnv && + (schib.pmcw.dev == console_devno)) { console_irq = schid.sch_no; return 1; /* found */ } return 0; } - -static int -cio_get_console_sch_no(void) +static int cio_get_console_sch_no(void) { struct subchannel_id schid; - + struct schib schib; + init_subchannel_id(&schid); if (console_irq != -1) { /* VM provided us with the irq number of the console. */ schid.sch_no = console_irq; - if (stsch_err(schid, &console_subchannel.schib) != 0 || - (console_subchannel.schib.pmcw.st != SUBCHANNEL_TYPE_IO) || - !console_subchannel.schib.pmcw.dnv) + if (stsch_err(schid, &schib) != 0 || + (schib.pmcw.st != SUBCHANNEL_TYPE_IO) || !schib.pmcw.dnv) return -1; - console_devno = console_subchannel.schib.pmcw.dev; + console_devno = schib.pmcw.dev; } else if (console_devno != -1) { /* At least the console device number is known. */ for_each_subchannel(cio_test_for_console, NULL); - if (console_irq == -1) - return -1; - } else { - /* unlike in 2.4, we cannot autoprobe here, since - * the channel subsystem is not fully initialized. - * With some luck, the HWC console can take over */ - return -1; } return console_irq; } -struct subchannel * -cio_probe_console(void) +struct subchannel *cio_probe_console(void) { - int sch_no, ret; struct subchannel_id schid; + struct subchannel *sch; + int sch_no, ret; - if (xchg(&console_subchannel_in_use, 1) != 0) - return ERR_PTR(-EBUSY); sch_no = cio_get_console_sch_no(); if (sch_no == -1) { - console_subchannel_in_use = 0; pr_warning("No CCW console was found\n"); return ERR_PTR(-ENODEV); } - memset(&console_subchannel, 0, sizeof(struct subchannel)); init_subchannel_id(&schid); schid.sch_no = sch_no; - ret = cio_validate_subchannel(&console_subchannel, schid); - if (ret) { - console_subchannel_in_use = 0; - return ERR_PTR(-ENODEV); - } + sch = css_alloc_subchannel(schid); + if (IS_ERR(sch)) + return sch; - /* - * enable console I/O-interrupt subclass - */ isc_register(CONSOLE_ISC); - console_subchannel.config.isc = CONSOLE_ISC; - console_subchannel.config.intparm = (u32)(addr_t)&console_subchannel; - ret = cio_commit_config(&console_subchannel); + sch->config.isc = CONSOLE_ISC; + sch->config.intparm = (u32)(addr_t)sch; + ret = cio_commit_config(sch); if (ret) { isc_unregister(CONSOLE_ISC); - console_subchannel_in_use = 0; + put_device(&sch->dev); return ERR_PTR(ret); } - return &console_subchannel; + console_sch = sch; + return sch; } -void -cio_release_console(void) +int cio_is_console(struct subchannel_id schid) { - console_subchannel.config.intparm = 0; - cio_commit_config(&console_subchannel); - isc_unregister(CONSOLE_ISC); - console_subchannel_in_use = 0; -} - -/* Bah... hack to catch console special sausages. */ -int -cio_is_console(struct subchannel_id schid) -{ - if (!console_subchannel_in_use) + if (!console_sch) return 0; - return schid_equal(&schid, &console_subchannel.schid); + return schid_equal(&schid, &console_sch->schid); } -struct subchannel * -cio_get_console_subchannel(void) +struct subchannel *cio_get_console_subchannel(void) { - if (!console_subchannel_in_use) - return NULL; - return &console_subchannel; + return console_sch; } +#endif /* CONFIG_CCW_CONSOLE */ -#endif static int __disable_subchannel_easy(struct subchannel_id schid, struct schib *schib) { diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 3b97c8bb30e5..78975471ef28 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -128,17 +128,12 @@ void do_IRQ(struct pt_regs *); /* Use with care. */ #ifdef CONFIG_CCW_CONSOLE extern struct subchannel *cio_probe_console(void); -extern void cio_release_console(void); extern int cio_is_console(struct subchannel_id); extern struct subchannel *cio_get_console_subchannel(void); -extern spinlock_t * cio_get_console_lock(void); -extern void *cio_get_console_priv(void); extern void cio_tsch(struct subchannel *sch); #else #define cio_is_console(schid) 0 #define cio_get_console_subchannel() NULL -#define cio_get_console_lock() NULL -#define cio_get_console_priv() NULL #endif #endif diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 8a1294b1cbaf..fb0e64f1845a 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -139,19 +139,15 @@ static void css_sch_todo(struct work_struct *work); static void css_subchannel_release(struct device *dev) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(dev); - sch = to_subchannel(dev); - if (!cio_is_console(sch->schid)) { - /* Reset intparm to zeroes. */ - sch->config.intparm = 0; - cio_commit_config(sch); - kfree(sch->lock); - kfree(sch); - } + sch->config.intparm = 0; + cio_commit_config(sch); + kfree(sch->lock); + kfree(sch); } -static struct subchannel *css_alloc_subchannel(struct subchannel_id schid) +struct subchannel *css_alloc_subchannel(struct subchannel_id schid) { struct subchannel *sch; int ret; @@ -326,10 +322,9 @@ int css_probe_device(struct subchannel_id schid) return PTR_ERR(sch); } ret = css_register_subchannel(sch); - if (ret) { - if (!cio_is_console(schid)) - put_device(&sch->dev); - } + if (ret) + put_device(&sch->dev); + return ret; } diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 4af3dfe70ef5..6ab424d753a9 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -102,6 +102,7 @@ extern void css_driver_unregister(struct css_driver *); extern void css_sch_device_unregister(struct subchannel *); extern int css_probe_device(struct subchannel_id); +extern struct subchannel *css_alloc_subchannel(struct subchannel_id); extern struct subchannel *get_subchannel_by_schid(struct subchannel_id); extern int css_init_done; extern int max_ssid; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 6ac0066d3158..25d04b7b5109 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1585,22 +1585,11 @@ static struct ccw_device console_cdev; static struct ccw_device_private console_private; static int console_cdev_in_use; -static DEFINE_SPINLOCK(ccw_console_lock); - -spinlock_t * cio_get_console_lock(void) -{ - return &ccw_console_lock; -} - static int ccw_device_console_enable(struct ccw_device *cdev, struct subchannel *sch) { - struct io_subchannel_private *io_priv = cio_get_console_priv(); int rc; - /* Attach subchannel private data. */ - memset(io_priv, 0, sizeof(*io_priv)); - set_io_private(sch, io_priv); io_subchannel_init_fields(sch); rc = cio_commit_config(sch); if (rc) @@ -1633,6 +1622,7 @@ out_unlock: struct ccw_device * ccw_device_probe_console(void) { + struct io_subchannel_private *io_priv; struct subchannel *sch; int ret; @@ -1648,10 +1638,20 @@ ccw_device_probe_console(void) console_cdev.private = &console_private; console_private.cdev = &console_cdev; console_private.int_class = IRQIO_CIO; + + io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA); + if (!io_priv) { + put_device(&sch->dev); + return ERR_PTR(-ENOMEM); + } + set_io_private(sch, io_priv); + ret = ccw_device_console_enable(&console_cdev, sch); if (ret) { - cio_release_console(); console_cdev_in_use = 0; + set_io_private(sch, NULL); + put_device(&sch->dev); + kfree(io_priv); return ERR_PTR(ret); } console_cdev.online = 1; From afdfed0f86d192c9957996d58d51c06ff2b9cb44 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:03:03 +0200 Subject: [PATCH 24/59] s390/cio: get rid of static console device Remove the static console ccw device (and friends) and use dynamic allocation for these structures. With this change the console device is treated (mostly) like any other ccw device and we can remove some special cases. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 73 ++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 25d04b7b5109..41a16785be29 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1087,19 +1087,12 @@ static int io_subchannel_probe(struct subchannel *sch) dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); cdev = sch_get_cdev(sch); - cdev->dev.groups = ccwdev_attr_groups; - device_initialize(&cdev->dev); - cdev->private->flags.initialized = 1; - ccw_device_register(cdev); - /* - * Check if the device is already online. If it is - * the reference count needs to be corrected since we - * didn't obtain a reference in ccw_device_set_online. - */ - if (cdev->private->state != DEV_STATE_NOT_OPER && - cdev->private->state != DEV_STATE_OFFLINE && - cdev->private->state != DEV_STATE_BOXED) - get_device(&cdev->dev); + rc = ccw_device_register(cdev); + if (rc) { + /* Release online reference. */ + put_device(&cdev->dev); + goto out_schedule; + } return 0; } io_subchannel_init_fields(sch); @@ -1581,10 +1574,6 @@ out: } #ifdef CONFIG_CCW_CONSOLE -static struct ccw_device console_cdev; -static struct ccw_device_private console_private; -static int console_cdev_in_use; - static int ccw_device_console_enable(struct ccw_device *cdev, struct subchannel *sch) { @@ -1595,8 +1584,6 @@ static int ccw_device_console_enable(struct ccw_device *cdev, if (rc) return rc; sch->driver = &io_subchannel_driver; - /* Initialize the ccw_device structure. */ - cdev->dev.parent= &sch->dev; sch_set_cdev(sch, cdev); io_subchannel_recog(cdev, sch); /* Now wait for the async. recognition to come to an end. */ @@ -1604,58 +1591,58 @@ static int ccw_device_console_enable(struct ccw_device *cdev, while (!dev_fsm_final_state(cdev)) ccw_device_wait_idle(cdev); - rc = -EIO; - if (cdev->private->state != DEV_STATE_OFFLINE) + /* Hold on to an extra reference while device is online. */ + get_device(&cdev->dev); + rc = ccw_device_online(cdev); + if (rc) goto out_unlock; - ccw_device_online(cdev); + while (!dev_fsm_final_state(cdev)) ccw_device_wait_idle(cdev); - if (cdev->private->state != DEV_STATE_ONLINE) - goto out_unlock; - rc = 0; + if (cdev->private->state == DEV_STATE_ONLINE) + cdev->online = 1; + else + rc = -EIO; out_unlock: spin_unlock_irq(cdev->ccwlock); + if (rc) /* Give up online reference since onlining failed. */ + put_device(&cdev->dev); return rc; } -struct ccw_device * -ccw_device_probe_console(void) +struct ccw_device *ccw_device_probe_console(void) { struct io_subchannel_private *io_priv; + struct ccw_device *cdev; struct subchannel *sch; int ret; - if (xchg(&console_cdev_in_use, 1) != 0) - return ERR_PTR(-EBUSY); sch = cio_probe_console(); - if (IS_ERR(sch)) { - console_cdev_in_use = 0; - return (void *) sch; - } - memset(&console_cdev, 0, sizeof(struct ccw_device)); - memset(&console_private, 0, sizeof(struct ccw_device_private)); - console_cdev.private = &console_private; - console_private.cdev = &console_cdev; - console_private.int_class = IRQIO_CIO; + if (IS_ERR(sch)) + return ERR_CAST(sch); io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA); if (!io_priv) { put_device(&sch->dev); return ERR_PTR(-ENOMEM); } + cdev = io_subchannel_create_ccwdev(sch); + if (IS_ERR(cdev)) { + put_device(&sch->dev); + kfree(io_priv); + return cdev; + } set_io_private(sch, io_priv); - - ret = ccw_device_console_enable(&console_cdev, sch); + ret = ccw_device_console_enable(cdev, sch); if (ret) { - console_cdev_in_use = 0; set_io_private(sch, NULL); put_device(&sch->dev); + put_device(&cdev->dev); kfree(io_priv); return ERR_PTR(ret); } - console_cdev.online = 1; - return &console_cdev; + return cdev; } /** From 14556b33f2a5d6a3bc75cd33b709452a31555b25 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:03:54 +0200 Subject: [PATCH 25/59] s390/css: introduce cio_register_early_subchannels Use cio_register_early_subchannels to register early subchannels which are already in use. Call this function before we do the actual subchannel scanning loop. This helps us to get rid of some more special cases regarding the console subchannel. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.c | 11 +++++++++-- drivers/s390/cio/cio.h | 4 ++-- drivers/s390/cio/css.c | 31 ++++++++++++------------------- drivers/s390/cio/css.h | 1 + 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index ab99500604b7..3ab99d883888 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -749,9 +749,16 @@ int cio_is_console(struct subchannel_id schid) return schid_equal(&schid, &console_sch->schid); } -struct subchannel *cio_get_console_subchannel(void) +void cio_register_early_subchannels(void) { - return console_sch; + int ret; + + if (!console_sch) + return; + + ret = css_register_subchannel(console_sch); + if (ret) + put_device(&console_sch->dev); } #endif /* CONFIG_CCW_CONSOLE */ diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 78975471ef28..57b41ec2ed40 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -129,11 +129,11 @@ void do_IRQ(struct pt_regs *); #ifdef CONFIG_CCW_CONSOLE extern struct subchannel *cio_probe_console(void); extern int cio_is_console(struct subchannel_id); -extern struct subchannel *cio_get_console_subchannel(void); +extern void cio_register_early_subchannels(void); extern void cio_tsch(struct subchannel *sch); #else #define cio_is_console(schid) 0 -#define cio_get_console_subchannel() NULL +static inline void cio_register_early_subchannels(void) {} #endif #endif diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index fb0e64f1845a..3b2245f58bde 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -224,16 +224,11 @@ void css_update_ssd_info(struct subchannel *sch) { int ret; - if (cio_is_console(sch->schid)) { - /* Console is initialized too early for functions requiring - * memory allocation. */ + ret = chsc_get_ssd_info(sch->schid, &sch->ssd_info); + if (ret) ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw); - } else { - ret = chsc_get_ssd_info(sch->schid, &sch->ssd_info); - if (ret) - ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw); - ssd_register_chpids(&sch->ssd_info); - } + + ssd_register_chpids(&sch->ssd_info); } static ssize_t type_show(struct device *dev, struct device_attribute *attr, @@ -271,7 +266,7 @@ static const struct attribute_group *default_subch_attr_groups[] = { NULL, }; -static int css_register_subchannel(struct subchannel *sch) +int css_register_subchannel(struct subchannel *sch) { int ret; @@ -314,13 +309,10 @@ int css_probe_device(struct subchannel_id schid) int ret; struct subchannel *sch; - if (cio_is_console(schid)) - sch = cio_get_console_subchannel(); - else { - sch = css_alloc_subchannel(schid); - if (IS_ERR(sch)) - return PTR_ERR(sch); - } + sch = css_alloc_subchannel(schid); + if (IS_ERR(sch)) + return PTR_ERR(sch); + ret = css_register_subchannel(sch); if (ret) put_device(&sch->dev); @@ -864,8 +856,7 @@ static struct notifier_block css_power_notifier = { /* * Now that the driver core is running, we can setup our channel subsystem. - * The struct subchannel's are created during probing (except for the - * static console subchannel). + * The struct subchannel's are created during probing. */ static int __init css_bus_init(void) { @@ -1044,6 +1035,8 @@ int css_complete_work(void) */ static int __init channel_subsystem_init_sync(void) { + /* Register subchannels which are already in use. */ + cio_register_early_subchannels(); /* Start initial subchannel evaluation. */ css_schedule_eval_all(); css_complete_work(); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 6ab424d753a9..2581b6986569 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -102,6 +102,7 @@ extern void css_driver_unregister(struct css_driver *); extern void css_sch_device_unregister(struct subchannel *); extern int css_probe_device(struct subchannel_id); +extern int css_register_subchannel(struct subchannel *); extern struct subchannel *css_alloc_subchannel(struct subchannel_id); extern struct subchannel *get_subchannel_by_schid(struct subchannel_id); extern int css_init_done; From 4e5ebd51214a1851841f952e9e5b5072ce5f9da4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:04:49 +0200 Subject: [PATCH 26/59] s390/css: remove unused function definitions Make css_probe_device static and remove an unimplemented prototype. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 4 ++-- drivers/s390/cio/css.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 3b2245f58bde..054fb428531f 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -304,10 +304,10 @@ int css_register_subchannel(struct subchannel *sch) return ret; } -int css_probe_device(struct subchannel_id schid) +static int css_probe_device(struct subchannel_id schid) { - int ret; struct subchannel *sch; + int ret; sch = css_alloc_subchannel(schid); if (IS_ERR(sch)) diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 2581b6986569..b1de60335238 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -101,7 +101,6 @@ extern int css_driver_register(struct css_driver *); extern void css_driver_unregister(struct css_driver *); extern void css_sch_device_unregister(struct subchannel *); -extern int css_probe_device(struct subchannel_id); extern int css_register_subchannel(struct subchannel *); extern struct subchannel *css_alloc_subchannel(struct subchannel_id); extern struct subchannel *get_subchannel_by_schid(struct subchannel_id); @@ -111,7 +110,6 @@ int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *), int (*fn_unknown)(struct subchannel_id, void *), void *data); extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *); -extern void css_reiterate_subchannels(void); void css_update_ssd_info(struct subchannel *sch); struct channel_subsystem { From 0ad8f714a135cf993606c21fc1ed0e303ef17c0d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:06:27 +0200 Subject: [PATCH 27/59] s390/cio: fix early init counter usage Via ccw_device_init_count we keep track of how many devices are in asynchronous device recognition/initialization. For early devices this variable was not only used prior to its initialization but used incorrectly (incremented but never decremented). Fix this by using static initialization for this variable (and friends), make them visible to device.c only, and decrement the counter after recognition of early devices is finished. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 13 ++++++------- drivers/s390/cio/device.h | 2 -- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 41a16785be29..1ab5f6c36d9b 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -44,6 +44,10 @@ static DEFINE_SPINLOCK(recovery_lock); static int recovery_phase; static const unsigned long recovery_delay[] = { 3, 30, 300 }; +static atomic_t ccw_device_init_count = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(ccw_device_init_wq); +static struct bus_type ccw_bus_type; + /******************* bus type handling ***********************/ /* The Linux driver model distinguishes between a bus type and @@ -128,8 +132,6 @@ static int ccw_uevent(struct device *dev, struct kobj_uevent_env *env) return ret; } -static struct bus_type ccw_bus_type; - static void io_subchannel_irq(struct subchannel *); static int io_subchannel_probe(struct subchannel *); static int io_subchannel_remove(struct subchannel *); @@ -138,8 +140,6 @@ static int io_subchannel_sch_event(struct subchannel *, int); static int io_subchannel_chp_event(struct subchannel *, struct chp_link *, int); static void recovery_func(unsigned long data); -wait_queue_head_t ccw_device_init_wq; -atomic_t ccw_device_init_count; static struct css_device_id io_subchannel_ids[] = { { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, }, @@ -192,10 +192,7 @@ int __init io_subchannel_init(void) { int ret; - init_waitqueue_head(&ccw_device_init_wq); - atomic_set(&ccw_device_init_count, 0); setup_timer(&recovery_timer, recovery_func, 0); - ret = bus_register(&ccw_bus_type); if (ret) return ret; @@ -1093,6 +1090,8 @@ static int io_subchannel_probe(struct subchannel *sch) put_device(&cdev->dev); goto out_schedule; } + if (atomic_dec_and_test(&ccw_device_init_count)) + wake_up(&ccw_device_init_wq); return 0; } io_subchannel_init_fields(sch); diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 7d4ecb65db00..8d1d29873172 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -81,8 +81,6 @@ dev_fsm_final_state(struct ccw_device *cdev) cdev->private->state == DEV_STATE_BOXED); } -extern wait_queue_head_t ccw_device_init_wq; -extern atomic_t ccw_device_init_count; int __init io_subchannel_init(void); void io_subchannel_recog_done(struct ccw_device *cdev); From e5dcf0025d7af58f525590ac86ac27cb44714e8d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 13 Apr 2013 13:08:01 +0200 Subject: [PATCH 28/59] s390/css: move subchannel lock allocation cio_validate_subchannel is used to do some basic checks to find out if it's worth to further investigate a subchannel. Move the allocation and initialization of the subchannels locks to css_alloc_subchannel. Clean up the functions involved while at it. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.c | 29 ++++------------------------- drivers/s390/cio/cio.h | 1 - drivers/s390/cio/css.c | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 3ab99d883888..af5fd716449f 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -471,15 +471,6 @@ int cio_disable_subchannel(struct subchannel *sch) } EXPORT_SYMBOL_GPL(cio_disable_subchannel); -int cio_create_sch_lock(struct subchannel *sch) -{ - sch->lock = kmalloc(sizeof(spinlock_t), GFP_KERNEL); - if (!sch->lock) - return -ENOMEM; - spin_lock_init(sch->lock); - return 0; -} - static int cio_check_devno_blacklisted(struct subchannel *sch) { if (is_blacklisted(sch->schid.ssid, sch->schib.pmcw.dev)) { @@ -536,28 +527,19 @@ int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) sprintf(dbf_txt, "valsch%x", schid.sch_no); CIO_TRACE_EVENT(4, dbf_txt); - /* Nuke all fields. */ - memset(sch, 0, sizeof(struct subchannel)); - - sch->schid = schid; - err = cio_create_sch_lock(sch); - if (err) - goto out; - mutex_init(&sch->reg_mutex); - /* * The first subchannel that is not-operational (ccode==3) - * indicates that there aren't any more devices available. + * indicates that there aren't any more devices available. * If stsch gets an exception, it means the current subchannel set - * is not valid. + * is not valid. */ - ccode = stsch_err (schid, &sch->schib); + ccode = stsch_err(schid, &sch->schib); if (ccode) { err = (ccode == 3) ? -ENXIO : ccode; goto out; } - /* Copy subchannel type from path management control word. */ sch->st = sch->schib.pmcw.st; + sch->schid = schid; switch (sch->st) { case SUBCHANNEL_TYPE_IO: @@ -574,10 +556,7 @@ int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports subchannel type %04X\n", sch->schid.ssid, sch->schid.sch_no, sch->st); - return 0; out: - kfree(sch->lock); - sch->lock = NULL; return err; } diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 57b41ec2ed40..d62f5e7f3cf1 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -121,7 +121,6 @@ extern int cio_commit_config(struct subchannel *sch); int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key); int cio_tm_intrg(struct subchannel *sch); -int cio_create_sch_lock(struct subchannel *); void do_adapter_IO(u8 isc); void do_IRQ(struct pt_regs *); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 054fb428531f..1ebe5d3ddebb 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -137,6 +137,18 @@ out: static void css_sch_todo(struct work_struct *work); +static int css_sch_create_locks(struct subchannel *sch) +{ + sch->lock = kmalloc(sizeof(*sch->lock), GFP_KERNEL); + if (!sch->lock) + return -ENOMEM; + + spin_lock_init(sch->lock); + mutex_init(&sch->reg_mutex); + + return 0; +} + static void css_subchannel_release(struct device *dev) { struct subchannel *sch = to_subchannel(dev); @@ -152,18 +164,26 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid) struct subchannel *sch; int ret; - sch = kmalloc (sizeof (*sch), GFP_KERNEL | GFP_DMA); - if (sch == NULL) + sch = kzalloc(sizeof(*sch), GFP_KERNEL | GFP_DMA); + if (!sch) return ERR_PTR(-ENOMEM); - ret = cio_validate_subchannel (sch, schid); - if (ret < 0) { - kfree(sch); - return ERR_PTR(ret); - } + + ret = cio_validate_subchannel(sch, schid); + if (ret < 0) + goto err; + + ret = css_sch_create_locks(sch); + if (ret) + goto err; + INIT_WORK(&sch->todo_work, css_sch_todo); sch->dev.release = &css_subchannel_release; device_initialize(&sch->dev); return sch; + +err: + kfree(sch); + return ERR_PTR(ret); } static int css_sch_device_register(struct subchannel *sch) @@ -756,7 +776,7 @@ static int __init setup_css(int nr) css->pseudo_subchannel->dev.release = css_subchannel_release; dev_set_name(&css->pseudo_subchannel->dev, "defunct"); mutex_init(&css->pseudo_subchannel->reg_mutex); - ret = cio_create_sch_lock(css->pseudo_subchannel); + ret = css_sch_create_locks(css->pseudo_subchannel); if (ret) { kfree(css->pseudo_subchannel); return ret; From d42e17129b9f473386d67c6a6549c28bd0e2b52e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 15 Apr 2013 16:22:23 +0200 Subject: [PATCH 29/59] s390/dasd: improve speed of dasdfmt Reorganize format IO requests and enable usage of PAV. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 109 +++++++++- drivers/s390/block/dasd_eckd.c | 344 ++++++++++++++++++++++---------- drivers/s390/block/dasd_int.h | 10 +- drivers/s390/block/dasd_ioctl.c | 31 +-- 4 files changed, 351 insertions(+), 143 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f1b7fdc58a5f..4195cc05efeb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -246,7 +246,7 @@ static struct dentry *dasd_debugfs_setup(const char *name, static int dasd_state_known_to_basic(struct dasd_device *device) { struct dasd_block *block = device->block; - int rc; + int rc = 0; /* Allocate and register gendisk structure. */ if (block) { @@ -273,7 +273,8 @@ static int dasd_state_known_to_basic(struct dasd_device *device) DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created"); device->state = DASD_STATE_BASIC; - return 0; + + return rc; } /* @@ -282,6 +283,7 @@ static int dasd_state_known_to_basic(struct dasd_device *device) static int dasd_state_basic_to_known(struct dasd_device *device) { int rc; + if (device->block) { dasd_profile_exit(&device->block->profile); if (device->block->debugfs_dentry) @@ -332,8 +334,10 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) if (block->base->discipline->do_analysis != NULL) rc = block->base->discipline->do_analysis(block); if (rc) { - if (rc != -EAGAIN) + if (rc != -EAGAIN) { device->state = DASD_STATE_UNFMT; + goto out; + } return rc; } dasd_setup_queue(block); @@ -341,11 +345,16 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) block->blocks << block->s2b_shift); device->state = DASD_STATE_READY; rc = dasd_scan_partitions(block); - if (rc) + if (rc) { device->state = DASD_STATE_BASIC; + return rc; + } } else { device->state = DASD_STATE_READY; } +out: + if (device->discipline->basic_to_ready) + rc = device->discipline->basic_to_ready(device); return rc; } @@ -368,6 +377,11 @@ static int dasd_state_ready_to_basic(struct dasd_device *device) { int rc; + if (device->discipline->ready_to_basic) { + rc = device->discipline->ready_to_basic(device); + if (rc) + return rc; + } device->state = DASD_STATE_BASIC; if (device->block) { struct dasd_block *block = device->block; @@ -402,16 +416,10 @@ static int dasd_state_unfmt_to_basic(struct dasd_device *device) static int dasd_state_ready_to_online(struct dasd_device * device) { - int rc; struct gendisk *disk; struct disk_part_iter piter; struct hd_struct *part; - if (device->discipline->ready_to_online) { - rc = device->discipline->ready_to_online(device); - if (rc) - return rc; - } device->state = DASD_STATE_ONLINE; if (device->block) { dasd_schedule_block_bh(device->block); @@ -444,6 +452,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) if (rc) return rc; } + device->state = DASD_STATE_READY; if (device->block && !(device->features & DASD_FEATURE_USERAW)) { disk = device->block->bdev->bd_disk; @@ -2223,6 +2232,77 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) return rc; } +static inline int _wait_for_wakeup_queue(struct list_head *ccw_queue) +{ + struct dasd_ccw_req *cqr; + + list_for_each_entry(cqr, ccw_queue, blocklist) { + if (cqr->callback_data != DASD_SLEEPON_END_TAG) + return 0; + } + + return 1; +} + +static int _dasd_sleep_on_queue(struct list_head *ccw_queue, int interruptible) +{ + struct dasd_device *device; + int rc; + struct dasd_ccw_req *cqr, *n; + +retry: + list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { + device = cqr->startdev; + if (cqr->status != DASD_CQR_FILLED) /*could be failed*/ + continue; + + if (test_bit(DASD_FLAG_LOCK_STOLEN, &device->flags) && + !test_bit(DASD_CQR_ALLOW_SLOCK, &cqr->flags)) { + cqr->status = DASD_CQR_FAILED; + cqr->intrc = -EPERM; + continue; + } + /*Non-temporary stop condition will trigger fail fast*/ + if (device->stopped & ~DASD_STOPPED_PENDING && + test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && + !dasd_eer_enabled(device)) { + cqr->status = DASD_CQR_FAILED; + cqr->intrc = -EAGAIN; + continue; + } + + /*Don't try to start requests if device is stopped*/ + if (interruptible) { + rc = wait_event_interruptible( + generic_waitq, !device->stopped); + if (rc == -ERESTARTSYS) { + cqr->status = DASD_CQR_FAILED; + cqr->intrc = rc; + continue; + } + } else + wait_event(generic_waitq, !(device->stopped)); + + if (!cqr->callback) + cqr->callback = dasd_wakeup_cb; + cqr->callback_data = DASD_SLEEPON_START_TAG; + dasd_add_request_tail(cqr); + } + + wait_event(generic_waitq, _wait_for_wakeup_queue(ccw_queue)); + + rc = 0; + list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { + if (__dasd_sleep_on_erp(cqr)) + rc = 1; + } + if (rc) + goto retry; + + + return 0; +} + /* * Queue a request to the tail of the device ccw_queue and wait for * it's completion. @@ -2232,6 +2312,15 @@ int dasd_sleep_on(struct dasd_ccw_req *cqr) return _dasd_sleep_on(cqr, 0); } +/* + * Start requests from a ccw_queue and wait for their completion. + */ +int dasd_sleep_on_queue(struct list_head *ccw_queue) +{ + return _dasd_sleep_on_queue(ccw_queue, 0); +} +EXPORT_SYMBOL(dasd_sleep_on_queue); + /* * Queue a request to the tail of the device ccw_queue and wait * interruptible for it's completion. diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6999fd919e94..6a44b27623ed 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2022,7 +2022,7 @@ static int dasd_eckd_do_analysis(struct dasd_block *block) return dasd_eckd_end_analysis(block); } -static int dasd_eckd_ready_to_online(struct dasd_device *device) +static int dasd_eckd_basic_to_ready(struct dasd_device *device) { return dasd_alias_add_device(device); }; @@ -2031,6 +2031,11 @@ static int dasd_eckd_online_to_ready(struct dasd_device *device) { cancel_work_sync(&device->reload_device); cancel_work_sync(&device->kick_validate); + return 0; +}; + +static int dasd_eckd_ready_to_basic(struct dasd_device *device) +{ return dasd_alias_remove_device(device); }; @@ -2050,45 +2055,34 @@ dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo) } static struct dasd_ccw_req * -dasd_eckd_format_device(struct dasd_device * device, - struct format_data_t * fdata) +dasd_eckd_build_format(struct dasd_device *base, + struct format_data_t *fdata) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *base_priv; + struct dasd_eckd_private *start_priv; + struct dasd_device *startdev; struct dasd_ccw_req *fcp; struct eckd_count *ect; + struct ch_t address; struct ccw1 *ccw; void *data; int rpt; - struct ch_t address; int cplength, datasize; - int i; + int i, j; int intensity = 0; int r0_perm; + int nr_tracks; - private = (struct dasd_eckd_private *) device->private; - rpt = recs_per_track(&private->rdc_data, 0, fdata->blksize); - set_ch_t(&address, - fdata->start_unit / private->rdc_data.trk_per_cyl, - fdata->start_unit % private->rdc_data.trk_per_cyl); + startdev = dasd_alias_get_start_dev(base); + if (!startdev) + startdev = base; - /* Sanity checks. */ - if (fdata->start_unit >= - (private->real_cyl * private->rdc_data.trk_per_cyl)) { - dev_warn(&device->cdev->dev, "Start track number %d used in " - "formatting is too big\n", fdata->start_unit); - return ERR_PTR(-EINVAL); - } - if (fdata->start_unit > fdata->stop_unit) { - dev_warn(&device->cdev->dev, "Start track %d used in " - "formatting exceeds end track\n", fdata->start_unit); - return ERR_PTR(-EINVAL); - } - if (dasd_check_blocksize(fdata->blksize) != 0) { - dev_warn(&device->cdev->dev, - "The DASD cannot be formatted with block size %d\n", - fdata->blksize); - return ERR_PTR(-EINVAL); - } + start_priv = (struct dasd_eckd_private *) startdev->private; + base_priv = (struct dasd_eckd_private *) base->private; + + rpt = recs_per_track(&base_priv->rdc_data, 0, fdata->blksize); + + nr_tracks = fdata->stop_unit - fdata->start_unit + 1; /* * fdata->intensity is a bit string that tells us what to do: @@ -2106,149 +2100,282 @@ dasd_eckd_format_device(struct dasd_device * device, r0_perm = 1; intensity = fdata->intensity; } + switch (intensity) { case 0x00: /* Normal format */ case 0x08: /* Normal format, use cdl. */ - cplength = 2 + rpt; - datasize = sizeof(struct DE_eckd_data) + + cplength = 2 + (rpt*nr_tracks); + datasize = sizeof(struct PFX_eckd_data) + sizeof(struct LO_eckd_data) + - rpt * sizeof(struct eckd_count); + rpt * nr_tracks * sizeof(struct eckd_count); break; case 0x01: /* Write record zero and format track. */ case 0x09: /* Write record zero and format track, use cdl. */ - cplength = 3 + rpt; - datasize = sizeof(struct DE_eckd_data) + + cplength = 2 + rpt * nr_tracks; + datasize = sizeof(struct PFX_eckd_data) + sizeof(struct LO_eckd_data) + sizeof(struct eckd_count) + - rpt * sizeof(struct eckd_count); + rpt * nr_tracks * sizeof(struct eckd_count); break; case 0x04: /* Invalidate track. */ case 0x0c: /* Invalidate track, use cdl. */ cplength = 3; - datasize = sizeof(struct DE_eckd_data) + + datasize = sizeof(struct PFX_eckd_data) + sizeof(struct LO_eckd_data) + sizeof(struct eckd_count); break; default: - dev_warn(&device->cdev->dev, "An I/O control call used " - "incorrect flags 0x%x\n", fdata->intensity); + dev_warn(&startdev->cdev->dev, + "An I/O control call used incorrect flags 0x%x\n", + fdata->intensity); return ERR_PTR(-EINVAL); } /* Allocate the format ccw request. */ - fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device); + fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, + datasize, startdev); if (IS_ERR(fcp)) return fcp; + start_priv->count++; data = fcp->data; ccw = fcp->cpaddr; switch (intensity & ~0x08) { case 0x00: /* Normal format. */ - define_extent(ccw++, (struct DE_eckd_data *) data, - fdata->start_unit, fdata->start_unit, - DASD_ECKD_CCW_WRITE_CKD, device); + prefix(ccw++, (struct PFX_eckd_data *) data, + fdata->start_unit, fdata->stop_unit, + DASD_ECKD_CCW_WRITE_CKD, base, startdev); /* grant subsystem permission to format R0 */ if (r0_perm) - ((struct DE_eckd_data *)data)->ga_extended |= 0x04; - data += sizeof(struct DE_eckd_data); + ((struct PFX_eckd_data *)data) + ->define_extent.ga_extended |= 0x04; + data += sizeof(struct PFX_eckd_data); ccw[-1].flags |= CCW_FLAG_CC; locate_record(ccw++, (struct LO_eckd_data *) data, - fdata->start_unit, 0, rpt, - DASD_ECKD_CCW_WRITE_CKD, device, + fdata->start_unit, 0, rpt*nr_tracks, + DASD_ECKD_CCW_WRITE_CKD, base, fdata->blksize); data += sizeof(struct LO_eckd_data); break; case 0x01: /* Write record zero + format track. */ - define_extent(ccw++, (struct DE_eckd_data *) data, - fdata->start_unit, fdata->start_unit, - DASD_ECKD_CCW_WRITE_RECORD_ZERO, - device); - data += sizeof(struct DE_eckd_data); + prefix(ccw++, (struct PFX_eckd_data *) data, + fdata->start_unit, fdata->stop_unit, + DASD_ECKD_CCW_WRITE_RECORD_ZERO, + base, startdev); + data += sizeof(struct PFX_eckd_data); ccw[-1].flags |= CCW_FLAG_CC; locate_record(ccw++, (struct LO_eckd_data *) data, - fdata->start_unit, 0, rpt + 1, - DASD_ECKD_CCW_WRITE_RECORD_ZERO, device, - device->block->bp_block); + fdata->start_unit, 0, rpt * nr_tracks + 1, + DASD_ECKD_CCW_WRITE_RECORD_ZERO, base, + base->block->bp_block); data += sizeof(struct LO_eckd_data); break; case 0x04: /* Invalidate track. */ - define_extent(ccw++, (struct DE_eckd_data *) data, - fdata->start_unit, fdata->start_unit, - DASD_ECKD_CCW_WRITE_CKD, device); - data += sizeof(struct DE_eckd_data); + prefix(ccw++, (struct PFX_eckd_data *) data, + fdata->start_unit, fdata->stop_unit, + DASD_ECKD_CCW_WRITE_CKD, base, startdev); + data += sizeof(struct PFX_eckd_data); ccw[-1].flags |= CCW_FLAG_CC; locate_record(ccw++, (struct LO_eckd_data *) data, fdata->start_unit, 0, 1, - DASD_ECKD_CCW_WRITE_CKD, device, 8); + DASD_ECKD_CCW_WRITE_CKD, base, 8); data += sizeof(struct LO_eckd_data); break; } - if (intensity & 0x01) { /* write record zero */ - ect = (struct eckd_count *) data; - data += sizeof(struct eckd_count); - ect->cyl = address.cyl; - ect->head = address.head; - ect->record = 0; - ect->kl = 0; - ect->dl = 8; - ccw[-1].flags |= CCW_FLAG_CC; - ccw->cmd_code = DASD_ECKD_CCW_WRITE_RECORD_ZERO; - ccw->flags = CCW_FLAG_SLI; - ccw->count = 8; - ccw->cda = (__u32)(addr_t) ect; - ccw++; - } - if ((intensity & ~0x08) & 0x04) { /* erase track */ - ect = (struct eckd_count *) data; - data += sizeof(struct eckd_count); - ect->cyl = address.cyl; - ect->head = address.head; - ect->record = 1; - ect->kl = 0; - ect->dl = 0; - ccw[-1].flags |= CCW_FLAG_CC; - ccw->cmd_code = DASD_ECKD_CCW_WRITE_CKD; - ccw->flags = CCW_FLAG_SLI; - ccw->count = 8; - ccw->cda = (__u32)(addr_t) ect; - } else { /* write remaining records */ - for (i = 0; i < rpt; i++) { + + for (j = 0; j < nr_tracks; j++) { + /* calculate cylinder and head for the current track */ + set_ch_t(&address, + (fdata->start_unit + j) / + base_priv->rdc_data.trk_per_cyl, + (fdata->start_unit + j) % + base_priv->rdc_data.trk_per_cyl); + if (intensity & 0x01) { /* write record zero */ ect = (struct eckd_count *) data; data += sizeof(struct eckd_count); ect->cyl = address.cyl; ect->head = address.head; - ect->record = i + 1; + ect->record = 0; ect->kl = 0; - ect->dl = fdata->blksize; - /* Check for special tracks 0-1 when formatting CDL */ - if ((intensity & 0x08) && - fdata->start_unit == 0) { - if (i < 3) { - ect->kl = 4; - ect->dl = sizes_trk0[i] - 4; - } - } - if ((intensity & 0x08) && - fdata->start_unit == 1) { - ect->kl = 44; - ect->dl = LABEL_SIZE - 44; - } + ect->dl = 8; ccw[-1].flags |= CCW_FLAG_CC; - ccw->cmd_code = DASD_ECKD_CCW_WRITE_CKD; + ccw->cmd_code = DASD_ECKD_CCW_WRITE_RECORD_ZERO; ccw->flags = CCW_FLAG_SLI; ccw->count = 8; ccw->cda = (__u32)(addr_t) ect; ccw++; } + if ((intensity & ~0x08) & 0x04) { /* erase track */ + ect = (struct eckd_count *) data; + data += sizeof(struct eckd_count); + ect->cyl = address.cyl; + ect->head = address.head; + ect->record = 1; + ect->kl = 0; + ect->dl = 0; + ccw[-1].flags |= CCW_FLAG_CC; + ccw->cmd_code = DASD_ECKD_CCW_WRITE_CKD; + ccw->flags = CCW_FLAG_SLI; + ccw->count = 8; + ccw->cda = (__u32)(addr_t) ect; + } else { /* write remaining records */ + for (i = 0; i < rpt; i++) { + ect = (struct eckd_count *) data; + data += sizeof(struct eckd_count); + ect->cyl = address.cyl; + ect->head = address.head; + ect->record = i + 1; + ect->kl = 0; + ect->dl = fdata->blksize; + /* + * Check for special tracks 0-1 + * when formatting CDL + */ + if ((intensity & 0x08) && + fdata->start_unit == 0) { + if (i < 3) { + ect->kl = 4; + ect->dl = sizes_trk0[i] - 4; + } + } + if ((intensity & 0x08) && + fdata->start_unit == 1) { + ect->kl = 44; + ect->dl = LABEL_SIZE - 44; + } + ccw[-1].flags |= CCW_FLAG_CC; + if (i != 0 || j == 0) + ccw->cmd_code = + DASD_ECKD_CCW_WRITE_CKD; + else + ccw->cmd_code = + DASD_ECKD_CCW_WRITE_CKD_MT; + ccw->flags = CCW_FLAG_SLI; + ccw->count = 8; + ccw->cda = (__u32)(addr_t) ect; + ccw++; + } + } } - fcp->startdev = device; - fcp->memdev = device; + + fcp->startdev = startdev; + fcp->memdev = startdev; fcp->retries = 256; + fcp->expires = startdev->default_expires * HZ; fcp->buildclk = get_tod_clock(); fcp->status = DASD_CQR_FILLED; + return fcp; } +static int +dasd_eckd_format_device(struct dasd_device *base, + struct format_data_t *fdata) +{ + struct dasd_ccw_req *cqr, *n; + struct dasd_block *block; + struct dasd_eckd_private *private; + struct list_head format_queue; + struct dasd_device *device; + int old_stop, format_step; + int step, rc = 0; + + block = base->block; + private = (struct dasd_eckd_private *) base->private; + + /* Sanity checks. */ + if (fdata->start_unit >= + (private->real_cyl * private->rdc_data.trk_per_cyl)) { + dev_warn(&base->cdev->dev, + "Start track number %u used in formatting is too big\n", + fdata->start_unit); + return -EINVAL; + } + if (fdata->stop_unit >= + (private->real_cyl * private->rdc_data.trk_per_cyl)) { + dev_warn(&base->cdev->dev, + "Stop track number %u used in formatting is too big\n", + fdata->stop_unit); + return -EINVAL; + } + if (fdata->start_unit > fdata->stop_unit) { + dev_warn(&base->cdev->dev, + "Start track %u used in formatting exceeds end track\n", + fdata->start_unit); + return -EINVAL; + } + if (dasd_check_blocksize(fdata->blksize) != 0) { + dev_warn(&base->cdev->dev, + "The DASD cannot be formatted with block size %u\n", + fdata->blksize); + return -EINVAL; + } + + INIT_LIST_HEAD(&format_queue); + old_stop = fdata->stop_unit; + + while (fdata->start_unit <= 1) { + fdata->stop_unit = fdata->start_unit; + cqr = dasd_eckd_build_format(base, fdata); + list_add(&cqr->blocklist, &format_queue); + + fdata->stop_unit = old_stop; + fdata->start_unit++; + + if (fdata->start_unit > fdata->stop_unit) + goto sleep; + } + +retry: + format_step = 255 / recs_per_track(&private->rdc_data, 0, + fdata->blksize); + while (fdata->start_unit <= old_stop) { + step = fdata->stop_unit - fdata->start_unit + 1; + if (step > format_step) + fdata->stop_unit = fdata->start_unit + format_step - 1; + + cqr = dasd_eckd_build_format(base, fdata); + if (IS_ERR(cqr)) { + if (PTR_ERR(cqr) == -ENOMEM) { + /* + * not enough memory available + * go to out and start requests + * retry after first requests were finished + */ + fdata->stop_unit = old_stop; + goto sleep; + } else + return PTR_ERR(cqr); + } + list_add(&cqr->blocklist, &format_queue); + + fdata->start_unit = fdata->stop_unit + 1; + fdata->stop_unit = old_stop; + } + +sleep: + dasd_sleep_on_queue(&format_queue); + + list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { + device = cqr->startdev; + private = (struct dasd_eckd_private *) device->private; + if (cqr->status == DASD_CQR_FAILED) + rc = -EIO; + list_del_init(&cqr->blocklist); + dasd_sfree_request(cqr, device); + private->count--; + } + + /* + * in case of ENOMEM we need to retry after + * first requests are finished + */ + if (fdata->start_unit <= fdata->stop_unit) + goto retry; + + return rc; +} + static void dasd_eckd_handle_terminated_request(struct dasd_ccw_req *cqr) { cqr->status = DASD_CQR_FILLED; @@ -4305,8 +4432,9 @@ static struct dasd_discipline dasd_eckd_discipline = { .uncheck_device = dasd_eckd_uncheck_device, .do_analysis = dasd_eckd_do_analysis, .verify_path = dasd_eckd_verify_path, - .ready_to_online = dasd_eckd_ready_to_online, + .basic_to_ready = dasd_eckd_basic_to_ready, .online_to_ready = dasd_eckd_online_to_ready, + .ready_to_basic = dasd_eckd_ready_to_basic, .fill_geometry = dasd_eckd_fill_geometry, .start_IO = dasd_start_IO, .term_IO = dasd_term_IO, diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 899e3f5a56e5..0785bd9bd5b6 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -300,10 +300,11 @@ struct dasd_discipline { * Last things to do when a device is set online, and first things * when it is set offline. */ - int (*ready_to_online) (struct dasd_device *); + int (*basic_to_ready) (struct dasd_device *); int (*online_to_ready) (struct dasd_device *); + int (*ready_to_basic) (struct dasd_device *); - /* + /* (struct dasd_device *); * Device operation functions. build_cp creates a ccw chain for * a block device request, start_io starts the request and * term_IO cancels it (e.g. in case of a timeout). format_device @@ -317,8 +318,8 @@ struct dasd_discipline { int (*start_IO) (struct dasd_ccw_req *); int (*term_IO) (struct dasd_ccw_req *); void (*handle_terminated_request) (struct dasd_ccw_req *); - struct dasd_ccw_req *(*format_device) (struct dasd_device *, - struct format_data_t *); + int (*format_device) (struct dasd_device *, + struct format_data_t *); int (*free_cp) (struct dasd_ccw_req *, struct request *); /* @@ -672,6 +673,7 @@ int dasd_term_IO(struct dasd_ccw_req *); void dasd_schedule_device_bh(struct dasd_device *); void dasd_schedule_block_bh(struct dasd_block *); int dasd_sleep_on(struct dasd_ccw_req *); +int dasd_sleep_on_queue(struct list_head *); int dasd_sleep_on_immediatly(struct dasd_ccw_req *); int dasd_sleep_on_interruptible(struct dasd_ccw_req *); void dasd_device_set_timer(struct dasd_device *, int); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 03c0e0444553..8be1b51e9311 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -143,12 +143,12 @@ static int dasd_ioctl_resume(struct dasd_block *block) /* * performs formatting of _device_ according to _fdata_ * Note: The discipline's format_function is assumed to deliver formatting - * commands to format a single unit of the device. In terms of the ECKD - * devices this means CCWs are generated to format a single track. + * commands to format multiple units of the device. In terms of the ECKD + * devices this means CCWs are generated to format multiple tracks. */ -static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) +static int +dasd_format(struct dasd_block *block, struct format_data_t *fdata) { - struct dasd_ccw_req *cqr; struct dasd_device *base; int rc; @@ -157,8 +157,8 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) return -EPERM; if (base->state != DASD_STATE_BASIC) { - pr_warning("%s: The DASD cannot be formatted while it is " - "enabled\n", dev_name(&base->cdev->dev)); + pr_warn("%s: The DASD cannot be formatted while it is enabled\n", + dev_name(&base->cdev->dev)); return -EBUSY; } @@ -178,21 +178,10 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) bdput(bdev); } - while (fdata->start_unit <= fdata->stop_unit) { - cqr = base->discipline->format_device(base, fdata); - if (IS_ERR(cqr)) - return PTR_ERR(cqr); - rc = dasd_sleep_on_interruptible(cqr); - dasd_sfree_request(cqr, cqr->memdev); - if (rc) { - if (rc != -ERESTARTSYS) - pr_err("%s: Formatting unit %d failed with " - "rc=%d\n", dev_name(&base->cdev->dev), - fdata->start_unit, rc); - return rc; - } - fdata->start_unit++; - } + rc = base->discipline->format_device(base, fdata); + if (rc) + return rc; + return 0; } From c55768765e7b488ff20832c6ba89ea4e017a7b1b Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 15 Apr 2013 16:41:31 +0200 Subject: [PATCH 30/59] s390/dasd: fix hanging device after resume with internal error 13 If too many ccw requests are pre-build before a suspend/resume cycle the device might not get enough memory to do path verification during resume. Requeue requests to the block device request queue on suspend and free pre-build ccw requests. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 67 +++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 4195cc05efeb..82758cbb220b 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2751,6 +2751,26 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data) wake_up(&dasd_flush_wq); } +/* + * Requeue a request back to the block request queue + * only works for block requests + */ +static int _dasd_requeue_request(struct dasd_ccw_req *cqr) +{ + struct dasd_block *block = cqr->block; + struct request *req; + unsigned long flags; + + if (!block) + return -EINVAL; + spin_lock_irqsave(&block->queue_lock, flags); + req = (struct request *) cqr->callback_data; + blk_requeue_request(block->request_queue, req); + spin_unlock_irqrestore(&block->queue_lock, flags); + + return 0; +} + /* * Go through all request on the dasd_block request queue, cancel them * on the respective dasd_device, and return them to the generic @@ -3469,10 +3489,11 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path); int dasd_generic_pm_freeze(struct ccw_device *cdev) { - struct dasd_ccw_req *cqr, *n; - int rc; - struct list_head freeze_queue; struct dasd_device *device = dasd_device_from_cdev(cdev); + struct list_head freeze_queue; + struct dasd_ccw_req *cqr, *n; + struct dasd_ccw_req *refers; + int rc; if (IS_ERR(device)) return PTR_ERR(device); @@ -3485,7 +3506,8 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) /* disallow new I/O */ dasd_device_set_stop_bits(device, DASD_STOPPED_PM); - /* clear active requests */ + + /* clear active requests and requeue them to block layer if possible */ INIT_LIST_HEAD(&freeze_queue); spin_lock_irq(get_ccwdev_lock(cdev)); rc = 0; @@ -3505,7 +3527,6 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) } list_move_tail(&cqr->devlist, &freeze_queue); } - spin_unlock_irq(get_ccwdev_lock(cdev)); list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) { @@ -3513,12 +3534,38 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) (cqr->status != DASD_CQR_CLEAR_PENDING)); if (cqr->status == DASD_CQR_CLEARED) cqr->status = DASD_CQR_QUEUED; - } - /* move freeze_queue to start of the ccw_queue */ - spin_lock_irq(get_ccwdev_lock(cdev)); - list_splice_tail(&freeze_queue, &device->ccw_queue); - spin_unlock_irq(get_ccwdev_lock(cdev)); + /* requeue requests to blocklayer will only work for + block device requests */ + if (_dasd_requeue_request(cqr)) + continue; + + /* remove requests from device and block queue */ + list_del_init(&cqr->devlist); + while (cqr->refers != NULL) { + refers = cqr->refers; + /* remove the request from the block queue */ + list_del(&cqr->blocklist); + /* free the finished erp request */ + dasd_free_erp_request(cqr, cqr->memdev); + cqr = refers; + } + if (cqr->block) + list_del_init(&cqr->blocklist); + cqr->block->base->discipline->free_cp( + cqr, (struct request *) cqr->callback_data); + } + + /* + * if requests remain then they are internal request + * and go back to the device queue + */ + if (!list_empty(&freeze_queue)) { + /* move freeze_queue to start of the ccw_queue */ + spin_lock_irq(get_ccwdev_lock(cdev)); + list_splice_tail(&freeze_queue, &device->ccw_queue); + spin_unlock_irq(get_ccwdev_lock(cdev)); + } dasd_put_device(device); return rc; } From bd9e034ef340e3a00301f67b00a247617891f1f0 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 15 Apr 2013 17:50:03 +0200 Subject: [PATCH 31/59] s390/signal: Add BEA to compat signal handler parameters This patch adds the last breaking event address as parameter for 31 bit compat program signal handlers as it is already done for 64 bit programs. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_signal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6de049fbe62d..c439ac9ced09 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -362,6 +362,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; } /* Place signal number on stack to allow backtrace from handler. */ @@ -421,6 +422,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__force __u64) &frame->info; regs->gprs[4] = (__force __u64) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: From ea793788f8ec868e655920f4726b2bd6a881e5ae Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:05:47 +0200 Subject: [PATCH 32/59] s390/pci: msi cleanup hash usage The hash used for mapping irq numbers to msi descriptors does not utilize all buckets that were allocated. Fix this by using the same value (computed by the number of bits used for the hash function) at relevant places. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_msi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c index 0297931335e1..b097aed05a9b 100644 --- a/arch/s390/pci/pci_msi.c +++ b/arch/s390/pci/pci_msi.c @@ -18,8 +18,9 @@ /* mapping of irq numbers to msi_desc */ static struct hlist_head *msi_hash; -static unsigned int msihash_shift = 6; -#define msi_hashfn(nr) hash_long(nr, msihash_shift) +static const unsigned int msi_hash_bits = 8; +#define MSI_HASH_BUCKETS (1U << msi_hash_bits) +#define msi_hashfn(nr) hash_long(nr, msi_hash_bits) static DEFINE_SPINLOCK(msi_map_lock); @@ -74,6 +75,7 @@ int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, map->irq = nr; map->msi = msi; zdev->msi_map[nr & ZPCI_MSI_MASK] = map; + INIT_HLIST_NODE(&map->msi_chain); pr_debug("%s hashing irq: %u to bucket nr: %llu\n", __func__, nr, msi_hashfn(nr)); @@ -125,11 +127,11 @@ int __init zpci_msihash_init(void) { unsigned int i; - msi_hash = kmalloc(256 * sizeof(*msi_hash), GFP_KERNEL); + msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL); if (!msi_hash) return -ENOMEM; - for (i = 0; i < (1U << msihash_shift); i++) + for (i = 0; i < MSI_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&msi_hash[i]); return 0; } From a2ab833360abbed3321fd694b69a5a32ee15785f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:11:14 +0200 Subject: [PATCH 33/59] s390/pci: debug device states Use the debugfs to keep track of a pci function's status changes. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_debug.h | 9 +++------ arch/s390/pci/pci.c | 3 --- arch/s390/pci/pci_clp.c | 13 ++++++++----- arch/s390/pci/pci_debug.c | 7 +++++-- drivers/pci/hotplug/s390_pci_hpc.c | 3 +++ 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h index 6bbec4265b6e..1ca5d1047c71 100644 --- a/arch/s390/include/asm/pci_debug.h +++ b/arch/s390/include/asm/pci_debug.h @@ -7,14 +7,11 @@ extern debug_info_t *pci_debug_msg_id; extern debug_info_t *pci_debug_err_id; #ifdef CONFIG_PCI_DEBUG -#define zpci_dbg(fmt, args...) \ - do { \ - if (pci_debug_msg_id->level >= 2) \ - debug_sprintf_event(pci_debug_msg_id, 2, fmt , ## args);\ - } while (0) +#define zpci_dbg(imp, fmt, args...) \ + debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args) #else /* !CONFIG_PCI_DEBUG */ -#define zpci_dbg(fmt, args...) do { } while (0) +#define zpci_dbg(imp, fmt, args...) do { } while (0) #endif #define zpci_err(text...) \ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index fddf847e71ac..20823f022925 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -99,9 +99,6 @@ static int __read_mostly aisb_max; static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; -debug_info_t *pci_debug_msg_id; -debug_info_t *pci_debug_err_id; - static inline int irq_to_msi_nr(unsigned int irq) { return irq & ZPCI_MSI_MASK; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index f339fe2feb15..bd34359d1546 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -144,6 +145,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) struct zpci_dev *zdev; int rc; + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); zdev = zpci_alloc_device(); if (IS_ERR(zdev)) return PTR_ERR(zdev); @@ -204,8 +206,8 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) *fh = rrb->response.fh; else { - pr_err("Set PCI FN failed with response: %x cc: %d\n", - rrb->response.hdr.rsp, rc); + zpci_dbg(0, "SPF fh:%x, cc:%d, resp:%x\n", *fh, rc, + rrb->response.hdr.rsp); rc = -EIO; } clp_free_block(rrb); @@ -221,6 +223,8 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) if (!rc) /* Success -> store enabled handle in zdev */ zdev->fh = fh; + + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } @@ -237,9 +241,8 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!rc) /* Success -> store disabled handle in zdev */ zdev->fh = fh; - else - dev_err(&zdev->pdev->dev, - "Failed to disable fn handle: 0x%x\n", fh); + + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index a5d07bc2a547..771b82359af4 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -11,12 +11,17 @@ #include #include #include +#include #include #include #include static struct dentry *debugfs_root; +debug_info_t *pci_debug_msg_id; +EXPORT_SYMBOL_GPL(pci_debug_msg_id); +debug_info_t *pci_debug_err_id; +EXPORT_SYMBOL_GPL(pci_debug_err_id); static char *pci_perf_names[] = { /* hardware counters */ @@ -168,7 +173,6 @@ int __init zpci_debug_init(void) return -EINVAL; debug_register_view(pci_debug_msg_id, &debug_sprintf_view); debug_set_level(pci_debug_msg_id, 3); - zpci_dbg("Debug view initialized\n"); /* error log */ pci_debug_err_id = debug_register("pci_error", 2, 1, 16); @@ -176,7 +180,6 @@ int __init zpci_debug_init(void) return -EINVAL; debug_register_view(pci_debug_err_id, &debug_hex_ascii_view); debug_set_level(pci_debug_err_id, 6); - zpci_err("Debug view initialized\n"); debugfs_root = debugfs_create_dir("pci", NULL); return 0; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 7db249a25016..6053e7221b51 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define SLOT_NAME_SIZE 10 @@ -49,6 +50,7 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return -EIO; rc = sclp_pci_configure(slot->zdev->fid); + zpci_dbg(3, "conf fid:%x, rc:%d\n", slot->zdev->fid, rc); if (!rc) { slot->zdev->state = ZPCI_FN_STATE_CONFIGURED; /* automatically scan the device after is was configured */ @@ -70,6 +72,7 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) * or do we need to trigger that here? */ rc = sclp_pci_deconfigure(slot->zdev->fid); + zpci_dbg(3, "deconf fid:%x, rc:%d\n", slot->zdev->fid, rc); if (!rc) { /* Fixme: better call List-PCI to find the disabled FH for the FID since the FH should be opaque... */ From cb65a669f62ecca123cf4f6998903ee628c59caf Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:12:17 +0200 Subject: [PATCH 34/59] s390/pci: do not modify function handles Don't modify function handles to get a disabled handle - call clp_disable_fh. With this change we also do no longer deconfigure enabled functions. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 7 +++++++ drivers/pci/hotplug/s390_pci_hpc.c | 9 ++++----- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 05333b7f0469..6c1801235db9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -140,6 +140,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev) struct zpci_dev *zpci_alloc_device(void); int zpci_create_device(struct zpci_dev *); int zpci_enable_device(struct zpci_dev *); +int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); int zpci_scan_device(struct zpci_dev *); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 20823f022925..24dcf059f061 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -955,6 +955,13 @@ out: } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_disable_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + return clp_disable_fh(zdev); +} +EXPORT_SYMBOL_GPL(zpci_disable_device); + int zpci_create_device(struct zpci_dev *zdev) { int rc; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 6053e7221b51..46a7b738f61f 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -68,17 +68,16 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) if (!zpci_fn_configured(slot->zdev->state)) return -EIO; + rc = zpci_disable_device(slot->zdev); + if (rc) + return rc; /* TODO: we rely on the user to unbind/remove the device, is that plausible * or do we need to trigger that here? */ rc = sclp_pci_deconfigure(slot->zdev->fid); zpci_dbg(3, "deconf fid:%x, rc:%d\n", slot->zdev->fid, rc); - if (!rc) { - /* Fixme: better call List-PCI to find the disabled FH - for the FID since the FH should be opaque... */ - slot->zdev->fh &= 0x7fffffff; + if (!rc) slot->zdev->state = ZPCI_FN_STATE_STANDBY; - } return rc; } From af0a8a8453f7c7b3497c9fecc053897690e00695 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:13:21 +0200 Subject: [PATCH 35/59] s390/pci: implement pcibios_add_device Use pcibios_add_device to do arch specific device initialization. This function will be called during pci_bus_add_device. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 24dcf059f061..01478f63c8a8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -868,6 +868,17 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) spin_unlock(&zpci_iomap_lock); } +int pcibios_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_debug_init_device(zdev); + zpci_fmb_enable_device(zdev); + zpci_map_resources(zdev); + + return 0; +} + static int zpci_create_device_bus(struct zpci_dev *zdev) { struct resource *res; @@ -1019,9 +1030,6 @@ int zpci_scan_device(struct zpci_dev *zdev) goto out; } - zpci_debug_init_device(zdev); - zpci_fmb_enable_device(zdev); - zpci_map_resources(zdev); pci_bus_add_devices(zdev->bus); /* now that pdev was added to the bus mark it as used */ From cbcca5d070c30909fd355018ed96134ee9018425 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:14:44 +0200 Subject: [PATCH 36/59] s390/pci: uninline instruction wrappers Uninline pci related instruction wrappers to de-bloat the code: add/remove: 15/0 grow/shrink: 2/24 up/down: 1326/-12628 (-11302) This is especially useful for the inlined pci read and write functions which are used all over the kernel. Also remove the unused __stpcifc while at it. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_insn.h | 203 +------------------------------ arch/s390/pci/Makefile | 4 +- arch/s390/pci/pci_insn.c | 194 +++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 199 deletions(-) create mode 100644 arch/s390/pci/pci_insn.c diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 1486a98d5dad..c6649e72cf40 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -1,10 +1,6 @@ #ifndef _ASM_S390_PCI_INSN_H #define _ASM_S390_PCI_INSN_H -#include - -#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ - /* Load/Store status codes */ #define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 #define ZPCI_PCI_ST_FUNC_IN_ERR 8 @@ -82,199 +78,12 @@ struct zpci_fib { u64 reserved7; } __packed; -/* Modify PCI Function Controls */ -static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) -{ - u8 cc; - asm volatile ( - " .insn rxy,0xe300000000d0,%[req],%[fib]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) - : : "cc"); - *status = req >> 24 & 0xff; - return cc; -} - -static inline int mpcifc_instr(u64 req, struct zpci_fib *fib) -{ - u8 cc, status; - - do { - cc = __mpcifc(req, fib, &status); - if (cc == 2) - msleep(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d\n", - __func__, cc, status); - return (cc) ? -EIO : 0; -} - -/* Refresh PCI Translations */ -static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) -{ - register u64 __addr asm("2") = addr; - register u64 __range asm("3") = range; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d30000,%[fn],%[addr]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [fn] "+d" (fn) - : [addr] "d" (__addr), "d" (__range) - : "cc"); - *status = fn >> 24 & 0xff; - return cc; -} - -static inline int rpcit_instr(u64 fn, u64 addr, u64 range) -{ - u8 cc, status; - - do { - cc = __rpcit(fn, addr, range, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", - __func__, cc, status, addr, range); - return (cc) ? -EIO : 0; -} - -/* Store PCI function controls */ -static inline u8 __stpcifc(u32 handle, u8 space, struct zpci_fib *fib, u8 *status) -{ - u64 fn = (u64) handle << 32 | space << 16; - u8 cc; - - asm volatile ( - " .insn rxy,0xe300000000d4,%[fn],%[fib]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [fn] "+d" (fn), [fib] "=m" (*fib) - : : "cc"); - *status = fn >> 24 & 0xff; - return cc; -} - -/* Set Interruption Controls */ -static inline void sic_instr(u16 ctl, char *unused, u8 isc) -{ - asm volatile ( - " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" - : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); -} - -/* PCI Load */ -static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) -{ - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; - u64 __data; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d20000,%[data],%[req]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [data] "=d" (__data), [req] "+d" (__req) - : "d" (__offset) - : "cc"); - *status = __req >> 24 & 0xff; - *data = __data; - return cc; -} - -static inline int pcilg_instr(u64 *data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcilg(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) { - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - /* TODO: on IO errors set data to 0xff... - * here or in users of pcilg (le conversion)? - */ - } - return (cc) ? -EIO : 0; -} - -/* PCI Store */ -static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) -{ - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d00000,%[data],%[req]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (__req) - : "d" (__offset), [data] "d" (data) - : "cc"); - *status = __req >> 24 & 0xff; - return cc; -} - -static inline int pcistg_instr(u64 data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcistg(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; -} - -/* PCI Store Block */ -static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) -{ - u8 cc; - - asm volatile ( - " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (req) - : [offset] "d" (offset), [data] "Q" (*data) - : "cc"); - *status = req >> 24 & 0xff; - return cc; -} - -static inline int pcistb_instr(const u64 *data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcistb(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; -} +int mpcifc_instr(u64 req, struct zpci_fib *fib); +int rpcit_instr(u64 fn, u64 addr, u64 range); +void sic_instr(u16 ctl, char *unused, u8 isc); +int pcilg_instr(u64 *data, u64 req, u64 offset); +int pcistg_instr(u64 data, u64 req, u64 offset); +int pcistb_instr(const u64 *data, u64 req, u64 offset); #endif diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index f0f426a113ce..086a2e37935d 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -2,5 +2,5 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o \ - pci_sysfs.o pci_event.o pci_debug.o +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ + pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c new file mode 100644 index 000000000000..c55962529f4f --- /dev/null +++ b/arch/s390/pci/pci_insn.c @@ -0,0 +1,194 @@ +/* + * s390 specific pci instructions + * + * Copyright IBM Corp. 2013 + */ + +#include +#include +#include +#include + +#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ + +/* Modify PCI Function Controls */ +static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) +{ + u8 cc; + + asm volatile ( + " .insn rxy,0xe300000000d0,%[req],%[fib]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) + : : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int mpcifc_instr(u64 req, struct zpci_fib *fib) +{ + u8 cc, status; + + do { + cc = __mpcifc(req, fib, &status); + if (cc == 2) + msleep(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d\n", + __func__, cc, status); + return (cc) ? -EIO : 0; +} + +/* Refresh PCI Translations */ +static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) +{ + register u64 __addr asm("2") = addr; + register u64 __range asm("3") = range; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d30000,%[fn],%[addr]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [fn] "+d" (fn) + : [addr] "d" (__addr), "d" (__range) + : "cc"); + *status = fn >> 24 & 0xff; + return cc; +} + +int rpcit_instr(u64 fn, u64 addr, u64 range) +{ + u8 cc, status; + + do { + cc = __rpcit(fn, addr, range, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", + __func__, cc, status, addr, range); + return (cc) ? -EIO : 0; +} + +/* Set Interruption Controls */ +void sic_instr(u16 ctl, char *unused, u8 isc) +{ + asm volatile ( + " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" + : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); +} + +/* PCI Load */ +static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + u64 __data; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d20000,%[data],%[req]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [data] "=d" (__data), [req] "+d" (__req) + : "d" (__offset) + : "cc"); + *status = __req >> 24 & 0xff; + *data = __data; + return cc; +} + +int pcilg_instr(u64 *data, u64 req, u64 offset) +{ + u8 cc, status; + + do { + cc = __pcilg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) { + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + /* TODO: on IO errors set data to 0xff... + * here or in users of pcilg (le conversion)? + */ + } + return (cc) ? -EIO : 0; +} +EXPORT_SYMBOL_GPL(pcilg_instr); + +/* PCI Store */ +static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d00000,%[data],%[req]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (__req) + : "d" (__offset), [data] "d" (data) + : "cc"); + *status = __req >> 24 & 0xff; + return cc; +} + +int pcistg_instr(u64 data, u64 req, u64 offset) +{ + u8 cc, status; + + do { + cc = __pcistg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc) ? -EIO : 0; +} +EXPORT_SYMBOL_GPL(pcistg_instr); + +/* PCI Store Block */ +static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) +{ + u8 cc; + + asm volatile ( + " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (req) + : [offset] "d" (offset), [data] "Q" (*data) + : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int pcistb_instr(const u64 *data, u64 req, u64 offset) +{ + u8 cc, status; + + do { + cc = __pcistb(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc) ? -EIO : 0; +} +EXPORT_SYMBOL_GPL(pcistb_instr); From b2a9e87d2ce8fb2d0ce08ee49168805975c622da Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:15:42 +0200 Subject: [PATCH 37/59] s390/pci: rename instruction wrappers Use distinct (and hopefully sane) names for the pci instruction wrappers. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_insn.h | 12 ++++++------ arch/s390/include/asm/pci_io.h | 10 +++++----- arch/s390/pci/pci.c | 12 ++++++------ arch/s390/pci/pci_dma.c | 5 +++-- arch/s390/pci/pci_insn.c | 18 +++++++++--------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index c6649e72cf40..e6a2bdd4d705 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -79,11 +79,11 @@ struct zpci_fib { } __packed; -int mpcifc_instr(u64 req, struct zpci_fib *fib); -int rpcit_instr(u64 fn, u64 addr, u64 range); -void sic_instr(u16 ctl, char *unused, u8 isc); -int pcilg_instr(u64 *data, u64 req, u64 offset); -int pcistg_instr(u64 data, u64 req, u64 offset); -int pcistb_instr(const u64 *data, u64 req, u64 offset); +int s390pci_mod_fc(u64 req, struct zpci_fib *fib); +int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); +int s390pci_load(u64 *data, u64 req, u64 offset); +int s390pci_store(u64 data, u64 req, u64 offset); +int s390pci_store_block(const u64 *data, u64 req, u64 offset); +void set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 5fd81f31d6c7..a312c7e5a71e 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ u64 data; \ int rc; \ \ - rc = pcilg_instr(&data, req, ZPCI_OFFSET(addr)); \ + rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ if (rc) \ data = -1ULL; \ return (RETTYPE) data; \ @@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ u64 data = (VALTYPE) val; \ \ - pcistg_instr(data, req, ZPCI_OFFSET(addr)); \ + s390pci_store(data, req, ZPCI_OFFSET(addr)); \ } zpci_read(8, u64) @@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len val = 0; /* let FW report error */ break; } - return pcistg_instr(val, req, offset); + return s390pci_store(val, req, offset); } static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) @@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) u64 data; u8 cc; - cc = pcilg_instr(&data, req, offset); + cc = s390pci_load(&data, req, offset); switch (len) { case 1: *((u8 *) dst) = (u8) data; @@ -111,7 +111,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) { - return pcistb_instr(data, req, offset); + return s390pci_store_block(data, req, offset); } static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 01478f63c8a8..2b21749cc2b3 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -176,7 +176,7 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, fib->aisb = (u64) bucket->aisb + aisb / 8; fib->aisbo = aisb & ZPCI_MSI_MASK; - rc = mpcifc_instr(req, fib); + rc = s390pci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); free_page((unsigned long) fib); @@ -206,7 +206,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args fib->iota = args->iota; fib->fmb_addr = args->fmb_addr; - rc = mpcifc_instr(req, fib); + rc = s390pci_mod_fc(req, fib); free_page((unsigned long) fib); return rc; } @@ -280,7 +280,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) u64 data; int rc; - rc = pcilg_instr(&data, req, offset); + rc = s390pci_load(&data, req, offset); data = data << ((8 - len) * 8); data = le64_to_cpu(data); if (!rc) @@ -298,7 +298,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) data = cpu_to_le64(data); data = data >> ((8 - len) * 8); - rc = pcistg_instr(data, req, offset); + rc = s390pci_store(data, req, offset); return rc; } @@ -470,7 +470,7 @@ scan: } /* enable interrupts again */ - sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); /* check again to not lose initiative */ rmb(); @@ -785,7 +785,7 @@ static int __init zpci_irq_init(void) spin_lock_init(&bucket->lock); /* set summary to 1 to be called every time for the ISC */ *zpci_irq_si = 1; - sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index a547419907c3..60e4999e6b67 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -169,8 +169,9 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, * needs to be redone! */ goto no_refresh; - rc = rpcit_instr((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + + rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index c55962529f4f..d9b573bf4eb7 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -26,7 +26,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int mpcifc_instr(u64 req, struct zpci_fib *fib) +int s390pci_mod_fc(u64 req, struct zpci_fib *fib) { u8 cc, status; @@ -60,7 +60,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) return cc; } -int rpcit_instr(u64 fn, u64 addr, u64 range) +int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) { u8 cc, status; @@ -77,7 +77,7 @@ int rpcit_instr(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void sic_instr(u16 ctl, char *unused, u8 isc) +void set_irq_ctrl(u16 ctl, char *unused, u8 isc) { asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" @@ -104,7 +104,7 @@ static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int pcilg_instr(u64 *data, u64 req, u64 offset) +int s390pci_load(u64 *data, u64 req, u64 offset) { u8 cc, status; @@ -123,7 +123,7 @@ int pcilg_instr(u64 *data, u64 req, u64 offset) } return (cc) ? -EIO : 0; } -EXPORT_SYMBOL_GPL(pcilg_instr); +EXPORT_SYMBOL_GPL(s390pci_load); /* PCI Store */ static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) @@ -143,7 +143,7 @@ static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) return cc; } -int pcistg_instr(u64 data, u64 req, u64 offset) +int s390pci_store(u64 data, u64 req, u64 offset) { u8 cc, status; @@ -158,7 +158,7 @@ int pcistg_instr(u64 data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc) ? -EIO : 0; } -EXPORT_SYMBOL_GPL(pcistg_instr); +EXPORT_SYMBOL_GPL(s390pci_store); /* PCI Store Block */ static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) @@ -176,7 +176,7 @@ static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int pcistb_instr(const u64 *data, u64 req, u64 offset) +int s390pci_store_block(const u64 *data, u64 req, u64 offset) { u8 cc, status; @@ -191,4 +191,4 @@ int pcistb_instr(const u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc) ? -EIO : 0; } -EXPORT_SYMBOL_GPL(pcistb_instr); +EXPORT_SYMBOL_GPL(s390pci_store_block); From f0bacb7fc4f7defb15a6575d92f8ea4342f8f09e Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:16:14 +0200 Subject: [PATCH 38/59] s390/pci: add exception table to load/store instructions Don't let pci_load and friends crash the kernel when called with e.g. an invalid offset. Return -ENXIO instead. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_io.h | 2 +- arch/s390/pci/pci_insn.c | 52 +++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index a312c7e5a71e..0e0bec9a3fb7 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -89,7 +89,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) { u64 data; - u8 cc; + int cc; cc = s390pci_load(&data, req, offset); switch (len) { diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index d9b573bf4eb7..4bc32f368f7d 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -8,6 +8,7 @@ #include #include #include +#include #define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ @@ -85,18 +86,20 @@ void set_irq_ctrl(u16 ctl, char *unused, u8 isc) } /* PCI Load */ -static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) { register u64 __req asm("2") = req; register u64 __offset asm("3") = offset; + int cc = -ENXIO; u64 __data; - u8 cc; asm volatile ( " .insn rre,0xb9d20000,%[data],%[req]\n" - " ipm %[cc]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [data] "=d" (__data), [req] "+d" (__req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req) : "d" (__offset) : "cc"); *status = __req >> 24 & 0xff; @@ -106,7 +109,8 @@ static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) int s390pci_load(u64 *data, u64 req, u64 offset) { - u8 cc, status; + u8 status; + int cc; do { cc = __pcilg(data, req, offset, &status); @@ -114,29 +118,27 @@ int s390pci_load(u64 *data, u64 req, u64 offset) udelay(ZPCI_INSN_BUSY_DELAY); } while (cc == 2); - if (cc) { + if (cc) printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", __func__, cc, status, req, offset); - /* TODO: on IO errors set data to 0xff... - * here or in users of pcilg (le conversion)? - */ - } - return (cc) ? -EIO : 0; + return (cc > 0) ? -EIO : cc; } EXPORT_SYMBOL_GPL(s390pci_load); /* PCI Store */ -static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) +static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) { register u64 __req asm("2") = req; register u64 __offset asm("3") = offset; - u8 cc; + int cc = -ENXIO; asm volatile ( " .insn rre,0xb9d00000,%[data],%[req]\n" - " ipm %[cc]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (__req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (__req) : "d" (__offset), [data] "d" (data) : "cc"); *status = __req >> 24 & 0xff; @@ -145,7 +147,8 @@ static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) int s390pci_store(u64 data, u64 req, u64 offset) { - u8 cc, status; + u8 status; + int cc; do { cc = __pcistg(data, req, offset, &status); @@ -156,20 +159,22 @@ int s390pci_store(u64 data, u64 req, u64 offset) if (cc) printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; + return (cc > 0) ? -EIO : cc; } EXPORT_SYMBOL_GPL(s390pci_store); /* PCI Store Block */ -static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) +static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) { - u8 cc; + int cc = -ENXIO; asm volatile ( " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" - " ipm %[cc]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (req) : [offset] "d" (offset), [data] "Q" (*data) : "cc"); *status = req >> 24 & 0xff; @@ -178,7 +183,8 @@ static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) int s390pci_store_block(const u64 *data, u64 req, u64 offset) { - u8 cc, status; + u8 status; + int cc; do { cc = __pcistb(data, req, offset, &status); @@ -189,6 +195,6 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset) if (cc) printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; + return (cc > 0) ? -EIO : cc; } EXPORT_SYMBOL_GPL(s390pci_store_block); From b170bad40dab1a1684d629b37cb65a5281d35bd8 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:17:15 +0200 Subject: [PATCH 39/59] s390/pci: do not read data after failed load If a pci load instruction fails the content of the register where the data is stored is possibly unchanged. Fix the inline assembly wrapper __pcilg to not return stale data. Additionally fix the callers of this function who access uninitialized variables. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_io.h | 4 ++++ arch/s390/pci/pci.c | 8 ++++---- arch/s390/pci/pci_insn.c | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 0e0bec9a3fb7..83a9caa6ae53 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -92,6 +92,9 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) int cc; cc = s390pci_load(&data, req, offset); + if (cc) + goto out; + switch (len) { case 1: *((u8 *) dst) = (u8) data; @@ -106,6 +109,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) *((u64 *) dst) = (u64) data; break; } +out: return cc; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2b21749cc2b3..51d16f1fb5ea 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -281,11 +281,11 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) int rc; rc = s390pci_load(&data, req, offset); - data = data << ((8 - len) * 8); - data = le64_to_cpu(data); - if (!rc) + if (!rc) { + data = data << ((8 - len) * 8); + data = le64_to_cpu(data); *val = (u32) data; - else + } else *val = 0xffffffff; return rc; } diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 4bc32f368f7d..22eeb9d7ffeb 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -103,7 +103,9 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) : "d" (__offset) : "cc"); *status = __req >> 24 & 0xff; - *data = __data; + if (!cc) + *data = __data; + return cc; } From 2c3700bbb2c9c9c1d10f930d400f573d55f8e750 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:18:41 +0200 Subject: [PATCH 40/59] s390/pci: return error after failed pci ops Access to pci config space via pci_ops should not fail silently. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 51d16f1fb5ea..6a054bf83eb0 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -405,20 +405,28 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; if (!zdev || devfn != ZPCI_DEVFN) - return 0; - return zpci_cfg_load(zdev, where, val, size); + ret = -ENODEV; + else + ret = zpci_cfg_load(zdev, where, val, size); + + return ret; } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; if (!zdev || devfn != ZPCI_DEVFN) - return 0; - return zpci_cfg_store(zdev, where, val, size); + ret = -ENODEV; + else + ret = zpci_cfg_store(zdev, where, val, size); + + return ret; } static struct pci_ops pci_root_ops = { From 89b0dc958b7ac08ecf23ca25df98f7effe897ed9 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:19:22 +0200 Subject: [PATCH 41/59] s390/pci: disable per default Disable pci on s390. Enable with pci=on. Suggested-by: Heiko Carstens Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6a054bf83eb0..dd8e13ef506c 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1106,13 +1106,13 @@ void zpci_deregister_hp_ops(void) } EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops); -unsigned int s390_pci_probe = 1; +unsigned int s390_pci_probe; EXPORT_SYMBOL_GPL(s390_pci_probe); char * __init pcibios_setup(char *str) { - if (!strcmp(str, "off")) { - s390_pci_probe = 0; + if (!strcmp(str, "on")) { + s390_pci_probe = 1; return NULL; } return str; From 4e4d035a928340e828f633059b735901584c67a7 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 16 Apr 2013 14:20:15 +0200 Subject: [PATCH 42/59] s390/pci: remove disable_device implementation pci_disable_device is called by a driver after it stops using the pci function - e.g. during the removal of the driver. The current implementation removes the architecture specific information of this function such that even after a call to pci_enable_device the pci function is no longer usable. Just remove pcibios_disable_device. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index dd8e13ef506c..48de2be7b46a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -600,19 +600,6 @@ static void zpci_map_resources(struct zpci_dev *zdev) } }; -static void zpci_unmap_resources(struct pci_dev *pdev) -{ - resource_size_t len; - int i; - - for (i = 0; i < PCI_BAR_COUNT; i++) { - len = pci_resource_len(pdev, i); - if (!len) - continue; - pci_iounmap(pdev, (void *) pdev->resource[i].start); - } -}; - struct zpci_dev *zpci_alloc_device(void) { struct zpci_dev *zdev; @@ -640,21 +627,6 @@ void zpci_free_device(struct zpci_dev *zdev) kfree(zdev); } -/* Called on removal of pci_dev, leaves zpci and bus device */ -static void zpci_remove_device(struct pci_dev *pdev) -{ - struct zpci_dev *zdev = get_zdev(pdev); - - dev_info(&pdev->dev, "Removing device %u\n", zdev->domain); - zdev->state = ZPCI_FN_STATE_CONFIGURED; - zpci_dma_exit_device(zdev); - zpci_fmb_disable_device(zdev); - zpci_sysfs_remove_device(&pdev->dev); - zpci_unmap_resources(pdev); - list_del(&zdev->entry); /* can be called from init */ - zdev->pdev = NULL; -} - static void zpci_scan_devices(void) { struct zpci_dev *zdev; @@ -692,12 +664,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) return 0; } -void pcibios_disable_device(struct pci_dev *pdev) -{ - zpci_remove_device(pdev); - pdev->sysdata = NULL; -} - int pcibios_add_platform_entries(struct pci_dev *pdev) { return zpci_sysfs_add_device(&pdev->dev); From c5034945ce59abacdd02c5eff29f4f54df197880 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 10 Sep 2012 16:14:33 +0200 Subject: [PATCH 43/59] s390/mm,gmap: implement gmap_translate() Implement gmap_translate() function which translates a guest absolute address to a user space process address without establishing the guest page table entries. This is useful for kvm guest address translations where no memory access is expected to happen soon (e.g. tprot exception handler). Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 + arch/s390/mm/pgtable.c | 107 +++++++++++++++++++++++++------- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a5443118cfb..fda82d6c7c83 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -760,6 +760,8 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(unsigned long address, struct gmap *); +unsigned long gmap_translate(unsigned long address, struct gmap *); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae44d2a34313..2accf7113d13 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -379,45 +379,108 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) +{ + unsigned long *table; + + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + return table; +} + +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); + /* Convert the gmap address to an mm address. */ + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_RO) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + /* * this function is assumed to be called with mmap_sem held */ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { - unsigned long *table, vmaddr, segment; - struct mm_struct *mm; + unsigned long *segment_ptr, vmaddr, segment; + struct vm_area_struct *vma; struct gmap_pgtable *mp; struct gmap_rmap *rmap; - struct vm_area_struct *vma; + struct mm_struct *mm; struct page *page; pgd_t *pgd; pud_t *pud; pmd_t *pmd; current->thread.gmap_addr = address; - mm = gmap->mm; - /* Walk the gmap address space page table */ - table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 20) & 0x7ff); - /* Convert the gmap address to an mm address. */ - segment = *table; - if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { + mm = gmap->mm; vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; vma = find_vma(mm, vmaddr); if (!vma || vma->vm_start > vmaddr) @@ -441,12 +504,12 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; - rmap->entry = table; + rmap->entry = segment_ptr; spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ - *table = pmd_val(*pmd) & PAGE_MASK; + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; From ab8e5235868f99dfc779e4eaff28f53d63714ce4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 16 Apr 2013 13:37:46 +0200 Subject: [PATCH 44/59] s390/mm,gmap: segment mapping race The gmap_map_segment function creates a special invalid segment table entry with the address of the requested target location in the process address space. The first access will create the connection between the gmap segment table and the target page table of the main process. If two threads do this concurrently both will walk the page tables and allocate a gmap_rmap structure for the same segment table entry. To avoid the race recheck the segment table entry after taking to page table lock. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 160 +++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 69 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2accf7113d13..bd954e96f51c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,12 +454,11 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -/* - * this function is assumed to be called with mmap_sem held - */ -unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long segment, + unsigned long *segment_ptr, + struct gmap *gmap) { - unsigned long *segment_ptr, vmaddr, segment; + unsigned long vmaddr; struct vm_area_struct *vma; struct gmap_pgtable *mp; struct gmap_rmap *rmap; @@ -469,48 +468,94 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) pud_t *pud; pmd_t *pmd; + mm = gmap->mm; + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->entry = segment_ptr; + spin_lock(&mm->page_table_lock); + if (*segment_ptr == segment) { + list_add(&rmap->list, &mp->mapper); + /* Set gmap segment table entry to page table. */ + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; + rmap = NULL; + } + spin_unlock(&mm->page_table_lock); + kfree(rmap); + return 0; +} + +static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, segment; + struct gmap_pgtable *mp; + struct page *page; + int rc; + current->thread.gmap_addr = address; segment_ptr = gmap_table_walk(address, gmap); if (IS_ERR(segment_ptr)) return -EFAULT; /* Convert the gmap address to an mm address. */ - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_RO) { - mm = gmap->mm; - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; - - /* Walk the parent mm page table */ - pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; - /* pmd now points to a valid segment table entry. */ - rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); - if (!rmap) - return -ENOMEM; - /* Link gmap segment table entry location to page table. */ - page = pmd_page(*pmd); - mp = (struct gmap_pgtable *) page->index; - rmap->entry = segment_ptr; - spin_lock(&mm->page_table_lock); - list_add(&rmap->list, &mp->mapper); - spin_unlock(&mm->page_table_lock); - /* Set gmap segment table entry to page table. */ - *segment_ptr = pmd_val(*pmd) & PAGE_MASK; - return vmaddr | (address & ~PMD_MASK); + while (1) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + /* Page table is present */ + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } + if (!(segment & _SEGMENT_ENTRY_RO)) + /* Nothing mapped in the gmap address space. */ + break; + rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + if (rc) + return rc; } return -EFAULT; } @@ -574,29 +619,6 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); -void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) -{ - struct gmap_rmap *rmap, *next; - struct gmap_pgtable *mp; - struct page *page; - int flush; - - flush = 0; - spin_lock(&mm->page_table_lock); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; - list_del(&rmap->list); - kfree(rmap); - flush = 1; - } - spin_unlock(&mm->page_table_lock); - if (flush) - __tlb_flush_global(); -} - static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -649,8 +671,8 @@ static inline void page_table_free_pgste(unsigned long *table) { } -static inline void gmap_unmap_notifier(struct mm_struct *mm, - unsigned long *table) +static inline void gmap_disconnect_pgtable(struct mm_struct *mm, + unsigned long *table) { } @@ -716,7 +738,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ @@ -759,7 +781,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) mm = tlb->mm; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; From 241fd9bcbc10c144531e88b5e3a62bc11090e5e4 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Fri, 19 Apr 2013 18:03:02 +0200 Subject: [PATCH 45/59] s390/zcore: Fix HSA copy length for last block Currently always one page is copied to a user buffer for the last HSA block in memcpy_hsa(). Now the correct length is used. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- drivers/s390/char/zcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1d61a01576d2..22820610022c 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -127,7 +127,7 @@ static int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode) } if (mode == TO_USER) { if (copy_to_user((__force __user void*) dest + offs, buf, - PAGE_SIZE)) + count - offs)) return -EFAULT; } else memcpy(dest + offs, buf, count - offs); From a2aec0d3e22f3f940a165181ef339ac16deefa7c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 20 Apr 2013 13:01:19 +0200 Subject: [PATCH 46/59] s390/compat: fix compat_sys_statfs() memory corruption The f_spare field within struct compat_statfs is four bytes larger than within the native 31 bit struct statfs. compat_sys_statfs() clears the f_spare field in user space which means that in compat mode four bytes that are behind the user space supplied struct compat_statfs will be corrupted (zeroed). According to Thomas Gleixner's Linux 2.6 history tree this bug is present since v2.5.74 87880da124 "[PATCH] s390: 31 bit compat.". So it get's fixed shortly before its 10th anniversary. Tough luck. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index f8c6df6cd1f0..d967ac8d55d0 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -135,7 +135,7 @@ struct compat_statfs { s32 f_namelen; s32 f_frsize; s32 f_flags; - s32 f_spare[5]; + s32 f_spare[4]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff From 0f58104c8c62e40c8734477b6a9308c77b5d9355 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 20 Apr 2013 14:25:42 +0200 Subject: [PATCH 47/59] s390/compat: fix compile error for !COMPAT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this one for !COMPAT: compat.h: In function ‘arch_compat_alloc_user_space’: compat.h:292:2: error: implicit declaration of function ‘is_compat_task’ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index d967ac8d55d0..1f1a4490cdeb 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -248,8 +248,6 @@ static inline int is_compat_task(void) return is_32bit_task(); } -#endif - static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; @@ -260,6 +258,8 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *) (stack - len); } +#endif + struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; From e4371f602e2fce650cb3f7d1ca5c7cd6fca78dda Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 20 Apr 2013 14:05:42 +0200 Subject: [PATCH 48/59] s390/compat: remove ptrace compat definitions from uapi header file The compat definitions are not part of the uapi. So move them to s390's private compat header file. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 16 ++++++++++++++++ arch/s390/include/asm/elf.h | 1 + arch/s390/include/uapi/asm/ptrace.h | 20 -------------------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 1f1a4490cdeb..da3df9f6eded 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -70,6 +70,22 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; +typedef struct { + u32 mask; + u32 addr; +} __aligned(8) psw_compat_t; + +typedef struct { + psw_compat_t psw; + u32 gprs[NUM_GPRS]; + u32 acrs[NUM_ACRS]; + u32 orig_gpr2; +} s390_compat_regs; + +typedef struct { + u32 gprs_high[NUM_GPRS]; +} s390_compat_regs_high; + struct compat_timespec { compat_time_t tv_sec; s32 tv_nsec; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1bfdf24b85a2..27ec2c3f95ac 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -119,6 +119,7 @@ */ #include +#include #include typedef s390_fp_regs elf_fpregset_t; diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index a5ca214b34fd..3aa9f1ec5b29 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -215,12 +215,6 @@ typedef struct unsigned long addr; } __attribute__ ((aligned(8))) psw_t; -typedef struct -{ - __u32 mask; - __u32 addr; -} __attribute__ ((aligned(8))) psw_compat_t; - #ifndef __s390x__ #define PSW_MASK_PER 0x40000000UL @@ -295,20 +289,6 @@ typedef struct unsigned long orig_gpr2; } s390_regs; -typedef struct -{ - psw_compat_t psw; - __u32 gprs[NUM_GPRS]; - __u32 acrs[NUM_ACRS]; - __u32 orig_gpr2; -} s390_compat_regs; - -typedef struct -{ - __u32 gprs_high[NUM_GPRS]; -} s390_compat_regs_high; - - /* * Now for the user space program event recording (trace) definitions. * The following structures are used only for the ptrace interface, don't From 63dd9b44ac926d3250c1e8dfcb309c37c870fe21 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 20 Apr 2013 14:07:29 +0200 Subject: [PATCH 49/59] s390/ptrace: remove empty ifdefs Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 3ee5da3bc10c..559512a455da 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,9 +9,7 @@ #include #ifndef __ASSEMBLY__ -#ifndef __s390x__ -#else /* __s390x__ */ -#endif /* __s390x__ */ + extern long psw_kernel_bits; extern long psw_user_bits; @@ -77,8 +75,6 @@ struct per_struct_kernel { #define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL -#ifdef __s390x__ -#endif /* __s390x__ */ /* * These are defined as per linux/ptrace.h, which see. */ From 186f50fa568a221fdabd6753341c2de4abd24b78 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 22 Apr 2013 19:27:17 +0200 Subject: [PATCH 50/59] s390/pci: return correct dma address for offset > PAGE_SIZE For offset > PAGE_SIZE, s390_dma_map_pages() will issue a warning and return a wrong dma address. This patch removes the warning and fixes the dma return address calculation. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 60e4999e6b67..f8e69d5bc0a9 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -269,8 +269,6 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, int flags = ZPCI_PTE_VALID; dma_addr_t dma_addr; - WARN_ON_ONCE(offset > PAGE_SIZE); - /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); iommu_page_index = dma_alloc_iommu(zdev, nr_pages); @@ -292,7 +290,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages); - return dma_addr + offset; + return dma_addr + (offset & ~PAGE_MASK); } out_free: From b8668fd0a7e1b59ff4fd33b65e7f6d46b2d3cf1c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Apr 2013 10:41:27 +0200 Subject: [PATCH 51/59] s390/uapi: change struct statfs[64] member types to unsigned values Kay Sievers reported that coreutils' stat tool has a problem with s390's statfs[64] definition: > The definition of struct statfs::f_type needs a fix. s390 is the only > architecture in the kernel that uses an int and expects magic > constants lager than INT_MAX to fit into. > > A fix is needed to make Fedora boot on s390, it currently fails to do > so. Userspace does not want to add code to paper-over this issue. [...] > Even coreutils cannot handle it: > #define RAMFS_MAGIC 0x858458f6 > # stat -f -c%t / > ffffffff858458f6 > > #define BTRFS_SUPER_MAGIC 0x9123683E > # stat -f -c%t /mnt > ffffffff9123683e The bug is caused by an implicit sign extension within the stat tool: out_uint_x (pformat, prefix_len, statfsbuf->f_type); where the format finally will be "%lx". A similar problem can be found in the 'tail' tool. s390 is the only architecture which has an int type f_type member in struct statfs[64]. Other architectures have either unsigned ints or long values, so that the problem doesn't occur there. Therefore change the type of the f_type member to unsigned int, so that we get zero extension instead of sign extension when assignment to a long value happens. This patch changes the s390 uapi struct stafs[64] definition in the kernel to contain only unsigned values. This was true for 32 bit builds anyway, since we use the generic uapi header file in that case. So lets not include conditionally the generic uapi header file but have the s390 implementation completely independent. Also fix the types of struct compat_stafs to match reality and move the definition of struct compat_statfs64 to asm/compat.h since it is not part of the api. Reported-by: Kay Sievers Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 37 ++++++++++++----- arch/s390/include/uapi/asm/statfs.h | 63 ++++++++++------------------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index da3df9f6eded..c1e7c646727c 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -140,18 +140,33 @@ struct compat_flock64 { }; struct compat_statfs { - s32 f_type; - s32 f_bsize; - s32 f_blocks; - s32 f_bfree; - s32 f_bavail; - s32 f_files; - s32 f_ffree; + u32 f_type; + u32 f_bsize; + u32 f_blocks; + u32 f_bfree; + u32 f_bavail; + u32 f_files; + u32 f_ffree; compat_fsid_t f_fsid; - s32 f_namelen; - s32 f_frsize; - s32 f_flags; - s32 f_spare[4]; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; +}; + +struct compat_statfs64 { + u32 f_type; + u32 f_bsize; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; + compat_fsid_t f_fsid; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h index 5acca0a34c20..a61d538756f2 100644 --- a/arch/s390/include/uapi/asm/statfs.h +++ b/arch/s390/include/uapi/asm/statfs.h @@ -7,9 +7,6 @@ #ifndef _S390_STATFS_H #define _S390_STATFS_H -#ifndef __s390x__ -#include -#else /* * We can't use because in 64-bit mode * we mix ints of different sizes in our struct statfs. @@ -21,49 +18,33 @@ typedef __kernel_fsid_t fsid_t; #endif struct statfs { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; }; struct statfs64 { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; }; -struct compat_statfs64 { - __u32 f_type; - __u32 f_bsize; - __u64 f_blocks; - __u64 f_bfree; - __u64 f_bavail; - __u64 f_files; - __u64 f_ffree; - __kernel_fsid_t f_fsid; - __u32 f_namelen; - __u32 f_frsize; - __u32 f_flags; - __u32 f_spare[4]; -}; - -#endif /* __s390x__ */ #endif From dc7ee00d4771b3218b10e09e1071ee6eb176d381 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 24 Apr 2013 10:20:43 +0200 Subject: [PATCH 52/59] s390: lowcore stack pointer offsets Store the stack pointers in the lowcore for the kernel stack, the async stack and the panic stack with the offset required for the first user. This avoids an unnecessary add instruction on the system call path. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 36 ++++++++++++++++-------------------- arch/s390/kernel/entry64.S | 34 ++++++++++++++-------------------- arch/s390/kernel/setup.c | 9 ++++++--- arch/s390/kernel/smp.c | 15 ++++++++++----- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 94feff7d6132..17d5cc057893 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -45,6 +45,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE #define BASED(name) name-system_call(%r13) @@ -97,10 +98,10 @@ STACK_SIZE = 1 << STACK_SHIFT sra %r14,\shift jnz 1f CHECK_STACK 1<<\shift,\savearea + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: l %r15,\stack # load target stack -2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro ADD64 high,low,timer @@ -150,7 +151,7 @@ ENTRY(__switch_to) l %r4,__THREAD_info(%r2) # get thread_info of prev l %r5,__THREAD_info(%r3) # get thread_info of next lr %r15,%r5 - ahi %r15,STACK_SIZE # end of kernel stack of next + ahi %r15,STACK_INIT # end of kernel stack of next st %r3,__LC_CURRENT # store task struct of next st %r5,__LC_THREAD_INFO # store thread info of next st %r15,__LC_KERNEL_STACK # store end of kernel stack @@ -178,7 +179,6 @@ sysc_stm: l %r13,__LC_SVC_NEW_PSW+4 sysc_per: l %r15,__LC_KERNEL_STACK - ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER @@ -359,11 +359,11 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+3,0x80 # check for per exception jnz pgm_svcper # -> single stepped svc 0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER l %r15,__LC_KERNEL_STACK -2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC stm %r8,%r9,__PT_PSW(%r11) @@ -485,7 +485,6 @@ io_work: # io_work_user: l %r1,__LC_KERNEL_STACK - ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -646,7 +645,6 @@ mcck_skip: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -674,6 +672,7 @@ mcck_panic: sra %r14,PAGE_SHIFT jz 0f l %r15,__LC_PANIC_STACK + j mcck_skip 0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip @@ -714,12 +713,10 @@ ENTRY(restart_int_handler) */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - ahi %r15,-__PT_SIZE # create pt_regs - stm %r0,%r7,__PT_R0(%r15) - stm %r8,%r9,__PT_PSW(%r15) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + stm %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(32,%r11),0(%r14) - lr %r15,%r11 - ahi %r15,-STACK_FRAME_OVERHEAD l %r1,BASED(1f) xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) lr %r2,%r11 # pass pointer to pt_regs @@ -799,15 +796,14 @@ cleanup_system_call: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER # set up saved register 11 l %r15,__LC_KERNEL_STACK - ahi %r15,-__PT_SIZE - st %r15,12(%r11) # r11 pt_regs pointer + la %r9,STACK_FRAME_OVERHEAD(%r15) + st %r9,12(%r11) # r11 pt_regs pointer # fill pt_regs - mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC - stm %r0,%r7,__PT_R0(%r15) - mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC # setup saved register 15 - ahi %r15,-STACK_FRAME_OVERHEAD st %r15,28(%r11) # r15 stack pointer # set new psw address and exit l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 2e6d60c55f90..72f230baf5d1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -39,6 +39,7 @@ __PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_PER_TRAP ) @@ -124,10 +125,10 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) srag %r14,%r14,\shift jnz 1f CHECK_STACK 1<<\shift,\savearea + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: lg %r15,\stack # load target stack -2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME scratch,enter_timer @@ -177,7 +178,7 @@ ENTRY(__switch_to) lg %r4,__THREAD_info(%r2) # get thread_info of prev lg %r5,__THREAD_info(%r3) # get thread_info of next lgr %r15,%r5 - aghi %r15,STACK_SIZE # end of kernel stack of next + aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack @@ -203,10 +204,8 @@ sysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call sysc_per: lg %r15,__LC_KERNEL_STACK - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER @@ -389,6 +388,7 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+3,0x80 # check for per exception jnz pgm_svcper # -> single stepped svc 0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER LAST_BREAK %r14 @@ -398,8 +398,7 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 2f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -526,7 +525,6 @@ io_work: # io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -688,7 +686,6 @@ mcck_skip: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -755,14 +752,12 @@ ENTRY(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ stack_overflow: - lg %r11,__LC_PANIC_STACK # change to panic stack - aghi %r11,-__PT_SIZE # create pt_regs + lg %r15,__LC_PANIC_STACK # change to panic stack + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 - lgr %r15,%r11 - aghi %r15,-STACK_FRAME_OVERHEAD xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow @@ -846,15 +841,14 @@ cleanup_system_call: mvc __TI_last_break(8,%r12),16(%r11) 0: # set up saved register r11 lg %r15,__LC_KERNEL_STACK - aghi %r15,-__PT_SIZE - stg %r15,24(%r11) # r11 pt_regs pointer + la %r9,STACK_FRAME_OVERHEAD(%r15) + stg %r9,24(%r11) # r11 pt_regs pointer # fill pt_regs - mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC - stmg %r0,%r7,__PT_R0(%r15) - mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC # setup saved register r15 - aghi %r15,-STACK_FRAME_OVERHEAD stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,sysc_do_svc diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 29268859d8ee..0f419c5765c8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -377,11 +377,14 @@ static void __init setup_lowcore(void) PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->kernel_stack = ((unsigned long) &init_thread_union) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; lc->machine_flags = S390_lowcore.machine_flags; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 549c9d173c0f..8bde89eafd88 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -181,8 +181,10 @@ static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = pcpu->async_stack + ASYNC_SIZE; - lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; + lc->async_stack = pcpu->async_stack + ASYNC_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->cpu_nr = cpu; #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { @@ -253,7 +255,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) struct _lowcore *lc = pcpu->lowcore; struct thread_info *ti = task_thread_info(tsk); - lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->thread_info = (unsigned long) task_thread_info(tsk); lc->current_task = (unsigned long) tsk; lc->user_timer = ti->user_timer; @@ -810,8 +813,10 @@ void __init smp_prepare_boot_cpu(void) pcpu->state = CPU_STATE_CONFIGURED; pcpu->address = boot_cpu_address; pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); - pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; - pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); From 616498813b11ffefe1ed36b9f2e4fd2cdbd22f15 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 24 Apr 2013 12:58:39 +0200 Subject: [PATCH 53/59] s390: system call path micro optimization Add a pointer to the system call table to the thread_info structure. The TIF_31BIT bit is set or cleared by SET_PERSONALITY exactly once for the lifetime of a process. With the pointer to the correct system call table in thread_info the system call code in entry64.S path can drop the check for TIF_31BIT which saves a couple of instructions. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 22 ++++++++++++++++++---- arch/s390/include/asm/syscall.h | 1 + arch/s390/include/asm/thread_info.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 3 +-- arch/s390/kernel/entry64.S | 9 ++------- 6 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 27ec2c3f95ac..78f4f8711d58 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -120,6 +120,7 @@ #include #include +#include #include typedef s390_fp_regs elf_fpregset_t; @@ -181,18 +182,31 @@ extern unsigned long elf_hwcap; extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) -#ifdef CONFIG_64BIT +#ifndef CONFIG_COMPAT +#define SET_PERSONALITY(ex) \ +do { \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ +} while (0) +#else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ set_personality(PER_LINUX | \ (current->personality & ~PER_MASK)); \ - if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ - else \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table_emu; \ + } else { \ clear_thread_flag(TIF_31BIT); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ + } \ } while (0) -#endif /* CONFIG_64BIT */ +#endif /* CONFIG_COMPAT */ #define STACK_RND_MASK 0x7ffUL diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index fe7b99759e12..cd29d2f4e4f3 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -23,6 +23,7 @@ * type here is what we want [need] for both 32 bit and 64 bit systems. */ extern const unsigned int sys_call_table[]; +extern const unsigned int sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 9e2cfe0349c3..51035e5d86cb 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -41,6 +41,7 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ + unsigned long sys_call_table; /* System call table address */ unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fface87056eb..7a82f9f70100 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -35,6 +35,7 @@ int main(void) DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 17d5cc057893..4d5e6f8a7978 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -188,6 +188,7 @@ sysc_vtime: mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: oi __TI_flags+3(%r12),_TIF_SYSCALL + l %r10,__TI_sysc_table(%r12) # 31 bit system call table lh %r8,__PT_INT_CODE+2(%r11) sla %r8,2 # shift and test for svc0 jnz sysc_nr_ok @@ -198,7 +199,6 @@ sysc_do_svc: lr %r8,%r1 sla %r8,2 sysc_nr_ok: - l %r10,BASED(.Lsys_call_table) # 31 bit system call table xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) st %r2,__PT_ORIG_GPR2(%r11) st %r7,STACK_FRAME_OVERHEAD(%r15) @@ -906,7 +906,6 @@ cleanup_idle_wait: .Ltrace_enter: .long do_syscall_trace_enter .Ltrace_exit: .long do_syscall_trace_exit .Lschedule_tail: .long schedule_tail -.Lsys_call_table: .long sys_call_table .Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS .Lhardirqs_on: .long trace_hardirqs_on_caller diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 72f230baf5d1..4c17eece707e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -216,6 +216,7 @@ sysc_vtime: mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: oi __TI_flags+7(%r12),_TIF_SYSCALL + lg %r10,__TI_sysc_table(%r12) # address of system call table llgh %r8,__PT_INT_CODE+2(%r11) slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok @@ -226,13 +227,6 @@ sysc_do_svc: sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,2 sysc_nr_ok: - larl %r10,sys_call_table # 64 bit system call table -#ifdef CONFIG_COMPAT - tm __TI_flags+5(%r12),(_TIF_31BIT>>16) - jno sysc_noemu - larl %r10,sys_call_table_emu # 31 bit system call table -sysc_noemu: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) @@ -1005,6 +999,7 @@ sys_call_table: #ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu + .globl sys_call_table_emu sys_call_table_emu: #include "syscalls.S" #undef SYSCALL From 581618f226e83eb0fbe81ba2a623c6a55cbb7487 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Apr 2013 09:11:54 +0200 Subject: [PATCH 54/59] s390: remove small stack config option We've seen repeatedly that 8KB stack size on 64 bit kernels is not sufficient. So simply remove the config option. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 13 ------------- arch/s390/Makefile | 10 ---------- arch/s390/include/asm/thread_info.h | 5 ----- 3 files changed, 28 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb8fb629f00b..bda6ba6f3cf5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -375,19 +375,6 @@ config PACK_STACK Say Y if you are unsure. -config SMALL_STACK - def_bool n - prompt "Use 8kb for kernel stack instead of 16kb" - depends on PACK_STACK && 64BIT && !LOCKDEP - help - If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. The reduced - size is 8kb instead of 16kb. This allows to run more threads on a - system and reduces the pressure on the memory management for higher - order page allocations. - - Say N if you are unsure. - config CHECK_STACK def_bool y prompt "Detect kernel stack overflow" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7e3ce78d4290..a7d68a467ce8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -55,22 +55,12 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls ifeq ($(call cc-option-yn,-mkernel-backchain),y) cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif # new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 51035e5d86cb..eb5f64d26d06 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -14,13 +14,8 @@ #define THREAD_ORDER 1 #define ASYNC_ORDER 1 #else /* CONFIG_64BIT */ -#ifndef __SMALL_STACK #define THREAD_ORDER 2 #define ASYNC_ORDER 2 -#else -#define THREAD_ORDER 1 -#define ASYNC_ORDER 1 -#endif #endif /* CONFIG_64BIT */ #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) From 94c163663fc1dcfc067a5fb3cc1446b9469975ce Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Apr 2013 10:03:15 +0200 Subject: [PATCH 55/59] s390/memory hotplug: prevent offline of active memory increments In case a machine supports memory hotplug all active memory increments present at IPL time have been initialized with a "usecount" of 1. This is wrong if the memory increment size is larger than the memory section size of the memory hotplug code. If that is the case the usecount must be initialized with the number of memory sections that fit into one memory increment. Otherwise it is possible to put a memory increment into standby state even if there are still active sections. Afterwards addressing exceptions might happen which cause the kernel to panic. However even worse, if a memory increment was put into standby state and afterwards into active state again, it's contents would have been zeroed, leading to memory corruption. This was only an issue for machines that support standby memory and have at least 256GB memory. This is broken since commit fdb1bb15 "[S390] sclp/memory hotplug: fix initial usecount of increments". Signed-off-by: Heiko Carstens Reviewed-by: Gerald Schaefer Cc: stable@vger.kernel.org # 2.6.39+ Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_cmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index cd798386b622..178836ec252b 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -561,6 +561,8 @@ static void __init sclp_add_standby_memory(void) add_memory_merged(0); } +#define MEM_SCT_SIZE (1UL << SECTION_SIZE_BITS) + static void __init insert_increment(u16 rn, int standby, int assigned) { struct memory_increment *incr, *new_incr; @@ -573,7 +575,7 @@ static void __init insert_increment(u16 rn, int standby, int assigned) new_incr->rn = rn; new_incr->standby = standby; if (!standby) - new_incr->usecount = 1; + new_incr->usecount = rzm > MEM_SCT_SIZE ? rzm/MEM_SCT_SIZE : 1; last_rn = 0; prev = &sclp_mem_list; list_for_each_entry(incr, &sclp_mem_list, list) { From 3ad19b8fb07497f975aa6aba187198ce11191dd2 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Apr 2013 13:01:39 +0200 Subject: [PATCH 56/59] s390/cio: fix irq statistics When we fetch an interrupt on the CCW console using tsch (via ccw_device_wait_idle formerly known as wait_cons_dev) we increment the irq count for the affected interruption class but it is not accounted as an IO interrupt. This is broken since commit b603d258a43b4e7339660bdd3b5c25eacd603e54 "s390: remove superfluous tpi from wait_cons_dev" Fix it so that the sum of the individual interrupts per class matches the number of IO interrupts again. Reported-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index af5fd716449f..935d80b4e9ce 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -647,6 +647,7 @@ void cio_tsch(struct subchannel *sch) local_bh_disable(); irq_enter(); } + kstat_incr_irqs_this_cpu(IO_INTERRUPT, NULL); if (sch->driver && sch->driver->irq) sch->driver->irq(sch); else From bd86055fc938493259a15dc00ee77435b4d2e83f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Apr 2013 13:02:39 +0200 Subject: [PATCH 57/59] s390/scm_blk: allow more cluster size values Allow 0 and powers of 2 between 2 and 128 for write_cluster_size. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/block/scm_blk_cluster.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c index f4bb61b0cea1..c0d102e3a48b 100644 --- a/drivers/s390/block/scm_blk_cluster.c +++ b/drivers/s390/block/scm_blk_cluster.c @@ -223,6 +223,8 @@ void scm_cluster_request_irq(struct scm_request *scmrq) bool scm_cluster_size_valid(void) { - return write_cluster_size == 0 || write_cluster_size == 32 || - write_cluster_size == 64 || write_cluster_size == 128; + if (write_cluster_size == 1 || write_cluster_size > 128) + return false; + + return !(write_cluster_size & (write_cluster_size - 1)); } From fff60fabc71c41bd1bd4beb4fdd735bb8e01096c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Apr 2013 13:03:18 +0200 Subject: [PATCH 58/59] s390/scm_blk: fix memleak in init function If the allocation of a single request fails the already allocated requests will not be freed. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/block/scm_blk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index e9b9c8392832..b303cab76a7f 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -465,7 +465,7 @@ static int __init scm_blk_init(void) scm_major = ret; ret = scm_alloc_rqs(nr_requests); if (ret) - goto out_unreg; + goto out_free; scm_debug = debug_register("scm_log", 16, 1, 16); if (!scm_debug) { @@ -486,7 +486,6 @@ out_dbf: debug_unregister(scm_debug); out_free: scm_free_rqs(); -out_unreg: unregister_blkdev(scm_major, "scm"); out: return ret; From 1c21351b722c9101bacdb961f5b5711669c882a0 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Apr 2013 14:49:48 +0200 Subject: [PATCH 59/59] s390/pci: use pci_scan_root_bus The pci config space accessors on s390 are (now) smart enough to figure out if a pci function is available. So instead of calling pci_create_root_bus and then pci_scan_single_device for each available function just call pci_scan_root_bus and let the pci core do the scanning (via config reads on all possible functions) and device creation. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 50 +++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 48de2be7b46a..e6f15b5d8b7d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -627,17 +627,6 @@ void zpci_free_device(struct zpci_dev *zdev) kfree(zdev); } -static void zpci_scan_devices(void) -{ - struct zpci_dev *zdev; - - mutex_lock(&zpci_list_lock); - list_for_each_entry(zdev, &zpci_list, entry) - if (zdev->state == ZPCI_FN_STATE_CONFIGURED) - zpci_scan_device(zdev); - mutex_unlock(&zpci_list_lock); -} - /* * Too late for any s390 specific setup, since interrupts must be set up * already which requires DMA setup too and the pci scan will access the @@ -846,6 +835,7 @@ int pcibios_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); + zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); zpci_map_resources(zdev); @@ -853,7 +843,7 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } -static int zpci_create_device_bus(struct zpci_dev *zdev) +static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; LIST_HEAD(resources); @@ -890,8 +880,8 @@ static int zpci_create_device_bus(struct zpci_dev *zdev) pci_add_resource(&resources, res); } - zdev->bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, - zdev, &resources); + zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, + zdev, &resources); if (!zdev->bus) return -EIO; @@ -955,9 +945,16 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out; - rc = zpci_create_device_bus(zdev); + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { + rc = zpci_enable_device(zdev); + if (rc) + goto out_free; + + zdev->state = ZPCI_FN_STATE_ONLINE; + } + rc = zpci_scan_bus(zdev); if (rc) - goto out_bus; + goto out_disable; mutex_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); @@ -965,21 +962,12 @@ int zpci_create_device(struct zpci_dev *zdev) hotplug_ops->create_slot(zdev); mutex_unlock(&zpci_list_lock); - if (zdev->state == ZPCI_FN_STATE_STANDBY) - return 0; - - rc = zpci_enable_device(zdev); - if (rc) - goto out_start; return 0; -out_start: - mutex_lock(&zpci_list_lock); - list_del(&zdev->entry); - if (hotplug_ops) - hotplug_ops->remove_slot(zdev); - mutex_unlock(&zpci_list_lock); -out_bus: +out_disable: + if (zdev->state == ZPCI_FN_STATE_ONLINE) + zpci_disable_device(zdev); +out_free: zpci_free_domain(zdev); out: return rc; @@ -1006,10 +994,7 @@ int zpci_scan_device(struct zpci_dev *zdev) pci_bus_add_devices(zdev->bus); - /* now that pdev was added to the bus mark it as used */ - zdev->state = ZPCI_FN_STATE_ONLINE; return 0; - out: zpci_dma_exit_device(zdev); clp_disable_fh(zdev); @@ -1123,7 +1108,6 @@ static int __init pci_base_init(void) if (rc) goto out_find; - zpci_scan_devices(); return 0; out_find: