From a106025015c8d24af6518aba3ac19c4dc9098b7c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Aug 2010 14:51:10 +0200 Subject: [PATCH 001/104] HID: picolcd: testing the wrong variable "ref_cnt" is a point to the reference count and it's non-null. We really want to test the reference count itself. Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hid-picolcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c index 346f0e34987e..c0bdebac5672 100644 --- a/drivers/hid/hid-picolcd.c +++ b/drivers/hid/hid-picolcd.c @@ -547,7 +547,7 @@ static void picolcd_fb_destroy(struct fb_info *info) ref_cnt--; mutex_lock(&info->lock); (*ref_cnt)--; - may_release = !ref_cnt; + may_release = !*ref_cnt; mutex_unlock(&info->lock); if (may_release) { framebuffer_release(info); From 1778ca298b06ec86af5fc9603447c379cbfb477b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= Date: Fri, 6 Aug 2010 10:08:04 +0200 Subject: [PATCH 002/104] HID: picolcd: correct ordering of framebuffer freeing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the free() ordering (which was never reached due to wrong check). Signed-off-by: Bruno Prémont Signed-off-by: Jiri Kosina --- drivers/hid/hid-picolcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c index c0bdebac5672..bc2e07740628 100644 --- a/drivers/hid/hid-picolcd.c +++ b/drivers/hid/hid-picolcd.c @@ -550,8 +550,8 @@ static void picolcd_fb_destroy(struct fb_info *info) may_release = !*ref_cnt; mutex_unlock(&info->lock); if (may_release) { - framebuffer_release(info); vfree((u8 *)info->fix.smem_start); + framebuffer_release(info); } } From 575570f02761bd680ba5731c1dfd4701062e7fb2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Jul 2010 16:06:34 +0800 Subject: [PATCH 003/104] tracing: Fix an unallocated memory access in function_graph With CONFIG_DEBUG_PAGEALLOC, I observed an unallocated memory access in function_graph trace. It appears we find a small size entry in ring buffer, but we access it as a big size entry. The access overflows the page size and touches an unallocated page. Cc: Signed-off-by: Shaohua Li LKML-Reference: <1280217994.32400.76.camel@sli10-desk.sh.intel.com> [ Added a comment to explain the problem - SDR ] Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 79f4bac99a94..b4c179ae4e45 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -507,7 +507,15 @@ get_return_for_leaf(struct trace_iterator *iter, * if the output fails. */ data->ent = *curr; - data->ret = *next; + /* + * If the next event is not a return type, then + * we only care about what type it is. Otherwise we can + * safely copy the entire event. + */ + if (next->ent.type == TRACE_GRAPH_RET) + data->ret = *next; + else + data->ret.ent.type = next->ent.type; } } From 18fab912d4fa70133df164d2dcf3310be0c38c34 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 28 Jul 2010 14:14:01 +0800 Subject: [PATCH 004/104] tracing: Fix ring_buffer_read_page reading out of page boundary With the configuration: CONFIG_DEBUG_PAGEALLOC=y and Shaohua's patch: [PATCH]x86: make spurious_fault check correct pte bit Function call graph trace with the following will trigger a page fault. # cd /sys/kernel/debug/tracing/ # echo function_graph > current_tracer # cat per_cpu/cpu1/trace_pipe_raw > /dev/null BUG: unable to handle kernel paging request at ffff880006e99000 IP: [] rb_event_length+0x1/0x3f PGD 1b19063 PUD 1b1d063 PMD 3f067 PTE 6e99160 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC last sysfs file: /sys/devices/virtual/net/lo/operstate CPU 1 Modules linked in: Pid: 1982, comm: cat Not tainted 2.6.35-rc6-aes+ #300 /Bochs RIP: 0010:[] [] rb_event_length+0x1/0x3f RSP: 0018:ffff880006475e38 EFLAGS: 00010006 RAX: 0000000000000ff0 RBX: ffff88000786c630 RCX: 000000000000001d RDX: ffff880006e98000 RSI: 0000000000000ff0 RDI: ffff880006e99000 RBP: ffff880006475eb8 R08: 000000145d7008bd R09: 0000000000000000 R10: 0000000000008000 R11: ffffffff815d9336 R12: ffff880006d08000 R13: ffff880006e605d8 R14: 0000000000000000 R15: 0000000000000018 FS: 00007f2b83e456f0(0000) GS:ffff880002100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff880006e99000 CR3: 00000000064a8000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process cat (pid: 1982, threadinfo ffff880006474000, task ffff880006e40770) Stack: ffff880006475eb8 ffffffff8108730f 0000000000000ff0 000000145d7008bd <0> ffff880006e98010 ffff880006d08010 0000000000000296 ffff88000786c640 <0> ffffffff81002956 0000000000000000 ffff8800071f4680 ffff8800071f4680 Call Trace: [] ? ring_buffer_read_page+0x15a/0x24a [] ? return_to_handler+0x15/0x2f [] tracing_buffers_read+0xb9/0x164 [] vfs_read+0xaf/0x150 [] return_to_handler+0x0/0x2f [] __bad_area_nosemaphore+0x17e/0x1a1 [] return_to_handler+0x0/0x2f [] bad_area_nosemaphore+0x13/0x15 Code: 80 25 b2 16 b3 00 fe c9 c3 55 48 89 e5 f0 80 0d a4 16 b3 00 02 c9 c3 55 31 c0 48 89 e5 48 83 3d 94 16 b3 00 01 c9 0f 94 c0 c3 55 <8a> 0f 48 89 e5 83 e1 1f b8 08 00 00 00 0f b6 d1 83 fa 1e 74 27 RIP [] rb_event_length+0x1/0x3f RSP CR2: ffff880006e99000 ---[ end trace a6877bb92ccb36bb ]--- The root cause is that ring_buffer_read_page() may read out of page boundary, because the boundary checking is done after reading. This is fixed via doing boundary checking before reading. Reported-by: Shaohua Li Cc: Signed-off-by: Huang Ying LKML-Reference: <1280297641.2771.307.camel@yhuang-dev> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1da7b6ea8b85..5ec8f1d1480e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3868,6 +3868,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, rpos = reader->read; pos += size; + if (rpos >= commit) + break; + event = rb_reader_event(cpu_buffer); size = rb_event_length(event); } while (len > size); From 669502ff31d7dba1849aec7ee2450a3c61f57d39 Mon Sep 17 00:00:00 2001 From: Andy Chittenden Date: Tue, 10 Aug 2010 10:19:53 -0400 Subject: [PATCH 005/104] SUNRPC: fix NFS client over TCP hangs due to packet loss (Bug 16494) When reusing a TCP connection, ensure that it's aborted if a previous shutdown attempt has been made on that connection so that the RPC over TCP recovery mechanism succeeds. Signed-off-by: Andy Chittenden Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7ca65c7005ea..66d136984d57 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", + dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); + sock_flag(sk, SOCK_ZAPPED), + sk->sk_shutdown); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra { unsigned int state = transport->inet->sk_state; - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) - return; - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) - return; + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: ESTABLISHED/SYN_SENT " + "sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } xs_abort_connection(xprt, transport); } From f5a73672d1811f2fb1dcb62ca90ceb12b2050ae7 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 10 Aug 2010 10:20:05 -0400 Subject: [PATCH 006/104] NFS: allow close-to-open cache semantics to apply to root of NFS filesystem To obey NFS cache semantics, the client must verify the cached attributes when a file is opened. In most cases this is done by a call to d_validate as one of the last steps in path_walk. However for the root of a filesystem, d_validate is only ever called on the mounted-on filesystem (except when the path ends '.' or '..'). So NFS has no chance to validate the attributes. So, in nfs_opendir, we revalidate the attributes if the opened directory is the mountpoint. This may cause double-validation for "." and ".." lookups, but that is better than missing regular /path/name lookups completely. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ceeb745..bd91b2778315 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); + if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { + /* This is a mountpoint, so d_revalidate will never + * have been called, so we need to refresh the + * inode (for close-open consistency) ourselves. + */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } return res; } From 9b00c64318cc337846a7a08a5678f5f19aeff188 Mon Sep 17 00:00:00 2001 From: "Patrick J. LoPresti" Date: Tue, 10 Aug 2010 17:28:01 -0400 Subject: [PATCH 007/104] nfs: Add "lookupcache" to displayed mount options Running "cat /proc/mounts" fails to display the "lookupcache" option. This oversight cost me a bunch of wasted time recently. The following simple patch fixes it. CC: stable Signed-off-by: Patrick LoPresti Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1ae39f6cb02..3d0d63c00304 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + seq_printf(m, ",lookupcache=none"); + else + seq_printf(m, ",lookupcache=pos"); + } } /* From 5d7ca35a182a626f8ed5596023ad42eb219a332e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Aug 2010 12:42:15 -0400 Subject: [PATCH 008/104] nfs: Remove redundant NULL check upon kfree() Signed-off-by: Davidlohr Bueso Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98ddec3..6b44bbfb7d8a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2273,8 +2273,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct out: if (page) __free_page(page); - if (locations) - kfree(locations); + kfree(locations); return status; } From 7a8b80eb38b248cfdf84048dad12073d5cfba3e6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 11 Aug 2010 12:47:08 -0400 Subject: [PATCH 009/104] xprtrdma: Do not truncate iova_start values in frmr registrations. A bad cast causes the iova_start, which in this case is a 64b DMA bus address, to be truncated on 32b systems. This breaks frmrs on 32b systems. No cast is needed. Signed-off-by: Steve Wise Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 27015c6d8eb5..3bdbd9f3b88f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1490,7 +1490,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = 0; /* unsignaled */ - frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; From 15cdc644b268a9a9ce73ce0b153129222c254b7b Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Wed, 11 Aug 2010 12:47:24 -0400 Subject: [PATCH 010/104] rpcrdma: Fix SQ size calculation when memreg is FRMR This patch updates the computation to include the worst case situation where three FRMR are required to map a single RPC REQ. Signed-off-by: Tom Tucker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 ++ net/sunrpc/xprtrdma/verbs.c | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5e28d1946a4..2ac3f6e8adff 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3bdbd9f3b88f..5f4c7b3bc711 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - /* Add room for frmr register and invalidate WRs */ - ep->rep_attr.cap.max_send_wr *= 3; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; + /* Add room for frmr register and invalidate WRs. + * 1. FRMR reg WR for head + * 2. FRMR invalidate WR for head + * 3. FRMR reg WR for pagelist + * 4. FRMR invalidate WR for pagelist + * 5. FRMR reg WR for tail + * 6. FRMR invalidate WR for tail + * 7. The RDMA_SEND WR + */ + ep->rep_attr.cap.max_send_wr *= 7; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { + cdata->max_requests = devattr.max_qp_wr / 7; + if (!cdata->max_requests) + return -EINVAL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: From 0702099bd86c33c2dcdbd3963433a61f3f503901 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 11 Aug 2010 13:10:16 -0400 Subject: [PATCH 011/104] NFS: fix the return value of nfs_file_fsync() By the commit af7fa16 2010-08-03 NFS: Fix up the fsync code close(2) became returning the non-zero value even if it went well. nfs_file_fsync() should return 0 when "status" is positive. Signed-off-by: J. R. Okajima Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a74ae82..eb51bd6201da 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); - if (!ret) + if (!ret && status < 0) ret = status; return ret; } From 2f81b47135a971a22ccad9f3cc8c68a583b93ea4 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 11 Aug 2010 16:11:00 -0400 Subject: [PATCH 012/104] ipw2100: register pm_qos request before registering pci driver It is necessary to call pm_qos_add_request prior to calling pm_qos_update_request. It was revealed that ipw2100 has been doing this wrong since "pm_qos: Get rid of the allocation in pm_qos_add_request()" (commit 82f682514a5df89ffb3890627eebf0897b7a84ec) added a WARN that results in the following backtrace: WARNING: at kernel/pm_qos_params.c:264 pm_qos_update_request+0x5e/0x70() pm_qos_update_request() called for unknown object Call Trace: [] ? warn_slowpath_common+0x78/0xb0 [] ? pm_qos_update_request+0x5e/0x70 [] ? pm_qos_update_request+0x5e/0x70 [] ? warn_slowpath_fmt+0x33/0x40 [] ? pm_qos_update_request+0x5e/0x70 [] ? ipw2100_up+0x3f/0xf10 [ipw2100] [] ? vsnprintf+0xc9/0x530 [] ? ipw2100_net_init+0x2c/0x1c0 [ipw2100] [] ? register_netdevice+0x7d/0x3c0 [] ? ipw2100_irq_tasklet+0x910/0x9a0 [ipw2100] [] ? register_netdev+0x2f/0x40 [] ? ipw2100_pci_init_one+0xd21/0x1060 [ipw2100] [] ? local_pci_probe+0xb/0x10 [] ? pci_device_probe+0x69/0x90 [] ? driver_probe_device+0x74/0x180 [] ? sysfs_create_dir+0x6a/0xb0 [] ? __driver_attach+0x79/0x80 [] ? __driver_attach+0x0/0x80 [] ? bus_for_each_dev+0x52/0x80 [] ? driver_attach+0x16/0x20 [] ? __driver_attach+0x0/0x80 [] ? bus_add_driver+0x17f/0x250 [] ? pci_device_shutdown+0x0/0x20 [] ? pci_device_remove+0x0/0x40 [] ? driver_register+0x63/0x120 [] ? __pci_register_driver+0x36/0xa0 [] ? ipw2100_init+0x48/0x67 [ipw2100] [] ? do_one_initcall+0x32/0x170 [] ? __vunmap+0xb8/0xf0 [] ? ipw2100_init+0x0/0x67 [ipw2100] [] ? sys_init_module+0x161/0x1000 [] ? sys_close+0x67/0xe0 [] ? syscall_call+0x7/0xb This patch moves pm_qos_add_request prior to pci_register_driver in ipw2100 in order to avoid this problem. Reported-by: Christoph Fritz Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2x00/ipw2100.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 16bbfa3189a5..1189dbb6e2a6 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -6665,12 +6665,13 @@ static int __init ipw2100_init(void) printk(KERN_INFO DRV_NAME ": %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO DRV_NAME ": %s\n", DRV_COPYRIGHT); + pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); + ret = pci_register_driver(&ipw2100_pci_driver); if (ret) goto out; - pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(&ipw2100_pci_driver.driver, From da93f10684bfba2983a70c10b5d417232b6a5245 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 11 Aug 2010 20:27:43 +0530 Subject: [PATCH 013/104] ath9k_htc: fix panic on packet injection using airbase-ng tool. This should fix the oops which occurs during the packet injection on monitor interface. EIP is at ath9k_htc_tx_start+0x69/0x220 [ath9k_htc] [] ? invoke_tx_handlers+0xa5a/0xee0 [mac80211] [] ? ath9k_htc_tx+0x44/0xe0 [ath9k_htc] [] ? __ieee80211_tx+0xf8/0x190 [mac80211] [] ? ieee80211_tx+0x9d/0x1a0 [mac80211] [] ? ieee80211_xmit+0x9c/0x1c0 [mac80211] [] ? ieee80211_monitor_start_xmit+0x85/0xb0 [mac80211] [] ? dev_hard_start_xmit+0x1ad/0x210 [] ? __alloc_skb+0x52/0x130 [] ? sch_direct_xmit+0x105/0x170 [] ? dev_queue_xmit+0x37f/0x4b0 [] ? packet_snd+0x21e/0x250 [] ? packet_sendmsg+0x32/0x40 [] ? sock_aio_write+0x113/0x130 [] ? do_sync_write+0xc4/0x100 [] ? autoremove_wake_function+0x0/0x50 [] ? security_file_permission+0x14/0x20 [] ? rw_verify_area+0x64/0xe0 [] ? handle_mm_fault+0x338/0x390 [] ? vfs_write+0x185/0x1a0 [] ? do_page_fault+0x160/0x3a0 [] ? sys_write+0x42/0x70 [] ? syscall_call+0x7/0xb Signed-off-by: Rajkumar Manoharan Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index bd0b4acc3ece..2a6e45a293a9 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -78,18 +78,23 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_sta *sta = tx_info->control.sta; struct ath9k_htc_sta *ista; - struct ath9k_htc_vif *avp; struct ath9k_htc_tx_ctl tx_ctl; enum htc_endpoint_id epid; u16 qnum; __le16 fc; u8 *tx_fhdr; - u8 sta_idx; + u8 sta_idx, vif_idx; hdr = (struct ieee80211_hdr *) skb->data; fc = hdr->frame_control; - avp = (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv; + if (tx_info->control.vif && + (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv) + vif_idx = ((struct ath9k_htc_vif *) + tx_info->control.vif->drv_priv)->index; + else + vif_idx = priv->nvifs; + if (sta) { ista = (struct ath9k_htc_sta *) sta->drv_priv; sta_idx = ista->index; @@ -106,7 +111,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) memset(&tx_hdr, 0, sizeof(struct tx_frame_hdr)); tx_hdr.node_idx = sta_idx; - tx_hdr.vif_idx = avp->index; + tx_hdr.vif_idx = vif_idx; if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) { tx_ctl.type = ATH9K_HTC_AMPDU; @@ -169,7 +174,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) tx_ctl.type = ATH9K_HTC_NORMAL; mgmt_hdr.node_idx = sta_idx; - mgmt_hdr.vif_idx = avp->index; + mgmt_hdr.vif_idx = vif_idx; mgmt_hdr.tidno = 0; mgmt_hdr.flags = 0; From 465c6cca2668a2db2a4ffce3dca5714017873f2b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 6 Aug 2010 03:26:24 -0400 Subject: [PATCH 014/104] tracing: Extend recordmcount to better support Blackfin mcount The mcount call on Blackfin systems includes some stack manipulation around the actual call site, so extend the build time perl script to support this. This way we can avoid doing the calculation at runtime. Signed-off-by: Mike Frysinger LKML-Reference: <1281079584-21205-1-git-send-email-vapier@gentoo.org> Signed-off-by: Steven Rostedt --- scripts/recordmcount.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index f3c9c0a90b98..2c56539e5da2 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -159,6 +159,7 @@ my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) my $mcount_regex; # Find the call site to mcount (return offset) +my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command my $can_use_local = 0; # If we can use local function references @@ -213,6 +214,7 @@ $section_regex = "Disassembly of section\\s+(\\S+):"; $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; $section_type = '@progbits'; +$mcount_adjust = 0; $type = ".long"; if ($arch eq "x86_64") { @@ -351,6 +353,9 @@ if ($arch eq "x86_64") { } elsif ($arch eq "microblaze") { # Microblaze calls '_mcount' instead of plain 'mcount'. $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; +} elsif ($arch eq "blackfin") { + $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$"; + $mcount_adjust = -4; } else { die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; } @@ -511,7 +516,7 @@ while () { } # is this a call site to mcount? If so, record it to print later if ($text_found && /$mcount_regex/) { - push(@offsets, hex $1); + push(@offsets, (hex $1) + $mcount_adjust); } } From 2a37a3df57c44e947271758a1aa4bea7bff9feab Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Jun 2010 15:21:34 -0400 Subject: [PATCH 015/104] tracing/events: Convert format output to seq_file Two new events were added that broke the current format output. Both from the SCSI system: scsi_dispatch_cmd_done and scsi_dispatch_cmd_timeout The reason is that their print_fmt exceeded a page size. Since the output of the format used simple_read_from_buffer and trace_seq, it was limited to a page size in output. This patch converts the printing of the format of an event into seq_file, which allows greater than a page size to be shown. I diffed all event formats comparing the output with and without this patch. All matched except for the above two, which showed just: FORMAT TOO BIG without this patch, but now properly displays the output with this patch. v2: Remove updating *pos in seq start function. [ Thanks to Li Zefan for pointing that out ] Reviewed-by: Li Zefan Cc: Martin K. Petersen Cc: Kei Tokunaga Cc: James Bottomley Cc: Tomohiro Kusumi Cc: Xiao Guangrong Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 206 ++++++++++++++++++++++++------------ 1 file changed, 140 insertions(+), 66 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 53cffc0b0801..45a8968707aa 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -29,6 +29,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); +#define COMMON_FIELD_COUNT 5 + struct list_head * trace_get_fields(struct ftrace_event_call *event_call) { @@ -544,85 +546,155 @@ out: return ret; } -static ssize_t -event_format_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +enum { + FORMAT_HEADER = 1, + FORMAT_PRINTFMT = 2, +}; + +static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_call *call = m->private; struct ftrace_event_field *field; struct list_head *head; - struct trace_seq *s; - int common_field_count = 5; - char *buf; - int r = 0; + loff_t index = *pos; - if (*ppos) - return 0; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - trace_seq_init(s); - - trace_seq_printf(s, "name: %s\n", call->name); - trace_seq_printf(s, "ID: %d\n", call->event.type); - trace_seq_printf(s, "format:\n"); + (*pos)++; head = trace_get_fields(call); - list_for_each_entry_reverse(field, head, link) { - /* - * Smartly shows the array type(except dynamic array). - * Normal: - * field:TYPE VAR - * If TYPE := TYPE[LEN], it is shown: - * field:TYPE VAR[LEN] - */ - const char *array_descriptor = strchr(field->type, '['); - if (!strncmp(field->type, "__data_loc", 10)) - array_descriptor = NULL; + switch ((unsigned long)v) { + case FORMAT_HEADER: - if (!array_descriptor) { - r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" - "\tsize:%u;\tsigned:%d;\n", - field->type, field->name, field->offset, - field->size, !!field->is_signed); - } else { - r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" - "\tsize:%u;\tsigned:%d;\n", - (int)(array_descriptor - field->type), - field->type, field->name, - array_descriptor, field->offset, - field->size, !!field->is_signed); - } + if (unlikely(list_empty(head))) + return NULL; - if (--common_field_count == 0) - r = trace_seq_printf(s, "\n"); + field = list_entry(head->prev, struct ftrace_event_field, link); + return field; - if (!r) - break; + case FORMAT_PRINTFMT: + /* all done */ + return NULL; } - if (r) - r = trace_seq_printf(s, "\nprint fmt: %s\n", - call->print_fmt); + /* + * To separate common fields from event fields, the + * LSB is set on the first event field. Clear it in case. + */ + v = (void *)((unsigned long)v & ~1L); - if (!r) { - /* - * ug! The format output is bigger than a PAGE!! - */ - buf = "FORMAT TOO BIG\n"; - r = simple_read_from_buffer(ubuf, cnt, ppos, - buf, strlen(buf)); - goto out; + field = v; + if (field->link.prev == head) + return (void *)FORMAT_PRINTFMT; + + field = list_entry(field->link.prev, struct ftrace_event_field, link); + + /* Set the LSB to notify f_show to print an extra newline */ + if (index == COMMON_FIELD_COUNT) + field = (struct ftrace_event_field *) + ((unsigned long)field | 1); + + return field; +} + +static void *f_start(struct seq_file *m, loff_t *pos) +{ + loff_t l = 0; + void *p; + + /* Start by showing the header */ + if (!*pos) + return (void *)FORMAT_HEADER; + + p = (void *)FORMAT_HEADER; + do { + p = f_next(m, p, &l); + } while (p && l < *pos); + + return p; +} + +static int f_show(struct seq_file *m, void *v) +{ + struct ftrace_event_call *call = m->private; + struct ftrace_event_field *field; + const char *array_descriptor; + + switch ((unsigned long)v) { + case FORMAT_HEADER: + seq_printf(m, "name: %s\n", call->name); + seq_printf(m, "ID: %d\n", call->event.type); + seq_printf(m, "format:\n"); + return 0; + + case FORMAT_PRINTFMT: + seq_printf(m, "\nprint fmt: %s\n", + call->print_fmt); + return 0; } - r = simple_read_from_buffer(ubuf, cnt, ppos, - s->buffer, s->len); - out: - kfree(s); - return r; + /* + * To separate common fields from event fields, the + * LSB is set on the first event field. Clear it and + * print a newline if it is set. + */ + if ((unsigned long)v & 1) { + seq_putc(m, '\n'); + v = (void *)((unsigned long)v & ~1L); + } + + field = v; + + /* + * Smartly shows the array type(except dynamic array). + * Normal: + * field:TYPE VAR + * If TYPE := TYPE[LEN], it is shown: + * field:TYPE VAR[LEN] + */ + array_descriptor = strchr(field->type, '['); + + if (!strncmp(field->type, "__data_loc", 10)) + array_descriptor = NULL; + + if (!array_descriptor) + seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + field->type, field->name, field->offset, + field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + array_descriptor, field->offset, + field->size, !!field->is_signed); + + return 0; +} + +static void f_stop(struct seq_file *m, void *p) +{ +} + +static const struct seq_operations trace_format_seq_ops = { + .start = f_start, + .next = f_next, + .stop = f_stop, + .show = f_show, +}; + +static int trace_format_open(struct inode *inode, struct file *file) +{ + struct ftrace_event_call *call = inode->i_private; + struct seq_file *m; + int ret; + + ret = seq_open(file, &trace_format_seq_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = call; + + return 0; } static ssize_t @@ -820,8 +892,10 @@ static const struct file_operations ftrace_enable_fops = { }; static const struct file_operations ftrace_event_format_fops = { - .open = tracing_open_generic, - .read = event_format_read, + .open = trace_format_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; static const struct file_operations ftrace_event_id_fops = { From 7032269e87ade34cc12891675371fa2ac150a620 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Thu, 12 Aug 2010 19:07:40 -0400 Subject: [PATCH 016/104] HID: hiddev: protect against disconnect/NULL-dereference race One of our users reports consistently hitting a NULL dereference that resolves to the "hid_to_usb_dev(hid);" call in hiddev_ioctl(), when disconnecting a Lego WeDo USB HID device from an OLPC XO running Scratch software. There's a FIXME comment and a guard against the dereference, but that happens farther down the function than the initial dereference does. This patch moves the call to be below the guard, and the user reports that it fixes the problem for him. OLPC bug report: http://dev.laptop.org/ticket/10174 Signed-off-by: Chris Ball Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 254a003af048..f2850171a69b 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -587,7 +587,7 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct hiddev_list *list = file->private_data; struct hiddev *hiddev = list->hiddev; struct hid_device *hid = hiddev->hid; - struct usb_device *dev = hid_to_usb_dev(hid); + struct usb_device *dev; struct hiddev_collection_info cinfo; struct hiddev_report_info rinfo; struct hiddev_field_info finfo; @@ -601,9 +601,11 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* Called without BKL by compat methods so no BKL taken */ /* FIXME: Who or what stop this racing with a disconnect ?? */ - if (!hiddev->exist) + if (!hiddev->exist || !hid) return -EIO; + dev = hid_to_usb_dev(hid); + switch (cmd) { case HIDIOCGVERSION: From 9c9e54a8df0be48aa359744f412377cc55c3b7d2 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Aug 2010 12:19:45 +0200 Subject: [PATCH 017/104] HID: hiddev: fix memory corruption due to invalid intfdata Commit bd25f4dd6972755579d0 ("HID: hiddev: use usb_find_interface, get rid of BKL") introduced using of private intfdata in hiddev for purpose of storing hiddev pointer. This is a problem, because intf pointer is already being set to struct hid_device pointer by HID core. This obviously lead to memory corruptions at device disconnect time, such as WARNING: at lib/kobject.c:595 kobject_put+0x37/0x4b() kobject: '(null)' (ffff88011e9cd898): is not initialized, yet kobject_put() is being called. Convert hiddev into accessing hiddev through struct hid_device which is in intfdata already. Reported-and-tested-by: Markus Trippelsdorf Reported-and-tested-by: Heinz Diehl Reported-and-tested-by: Alan Ott Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index f2850171a69b..0a29c51114aa 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -266,13 +266,15 @@ static int hiddev_open(struct inode *inode, struct file *file) { struct hiddev_list *list; struct usb_interface *intf; + struct hid_device *hid; struct hiddev *hiddev; int res; intf = usb_find_interface(&hiddev_driver, iminor(inode)); if (!intf) return -ENODEV; - hiddev = usb_get_intfdata(intf); + hid = usb_get_intfdata(intf); + hiddev = hid->hiddev; if (!(list = kzalloc(sizeof(struct hiddev_list), GFP_KERNEL))) return -ENOMEM; @@ -890,7 +892,6 @@ int hiddev_connect(struct hid_device *hid, unsigned int force) hid->hiddev = hiddev; hiddev->hid = hid; hiddev->exist = 1; - usb_set_intfdata(usbhid->intf, usbhid); retval = usb_register_dev(usbhid->intf, &hiddev_class); if (retval) { err_hid("Not able to get a minor for this device."); From 71ba186c123630ddab17667ec9ecf7e2ef211295 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Thu, 12 Aug 2010 14:23:28 +0530 Subject: [PATCH 018/104] ath9k_htc: Fix disconnect issue in HT40 mode. Some APs advertise that they may be HT40 capable in the capabilites but the current operating channel configuration may be only HT20. This causes disconnection as ath9k_htc sets WLAN_RC_40_FLAG despite the AP operating in HT20 mode. Hence set this flag only if the current channel configuration is HT40 enabled. Cc: stable@kernel.org Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index ebed9d1691a5..7d09b4b17bbd 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -366,7 +366,8 @@ static void ath9k_htc_setup_rate(struct ath9k_htc_priv *priv, caps = WLAN_RC_HT_FLAG; if (sta->ht_cap.mcs.rx_mask[1]) caps |= WLAN_RC_DS_FLAG; - if (sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) + if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && + (conf_is_ht40(&priv->hw->conf))) caps |= WLAN_RC_40_FLAG; if (conf_is_ht40(&priv->hw->conf) && (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40)) From fe0dbcc9d2e941328b3269dab102b94ad697ade5 Mon Sep 17 00:00:00 2001 From: Yuri Kululin Date: Fri, 13 Aug 2010 13:46:12 +0400 Subject: [PATCH 019/104] wl1251: fix trigger scan timeout usage Use appropriate command (CMD_TRIGGER_SCAN_TO) instead of scan command (CMD_SCAN) to configure trigger scan timeout. This was broken in commit 3a98c30f3e8bb1f32b5bcb74a39647b3670de275. This fix address the bug reported here: https://bugzilla.kernel.org/show_bug.cgi?id=16554 Cc: stable@kernel.org Signed-off-by: Yuri Ershov Signed-off-by: Yuri Kululin Acked-by: Kalle Valo Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/wl1251_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/wl12xx/wl1251_cmd.c b/drivers/net/wireless/wl12xx/wl1251_cmd.c index a37b30cef489..ce3722f4c3e3 100644 --- a/drivers/net/wireless/wl12xx/wl1251_cmd.c +++ b/drivers/net/wireless/wl12xx/wl1251_cmd.c @@ -484,7 +484,7 @@ int wl1251_cmd_trigger_scan_to(struct wl1251 *wl, u32 timeout) cmd->timeout = timeout; - ret = wl1251_cmd_send(wl, CMD_SCAN, cmd, sizeof(*cmd)); + ret = wl1251_cmd_send(wl, CMD_TRIGGER_SCAN_TO, cmd, sizeof(*cmd)); if (ret < 0) { wl1251_error("cmd trigger scan to failed: %d", ret); goto out; From ca6cff1f80f30cc6313a943339361ad6f9e76548 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 13 Aug 2010 18:36:40 +0530 Subject: [PATCH 020/104] ath9k_htc: load proper firmware for device ID 7015 This patch handles the firmware loading properly for device ID 7015. Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/eeprom.h | 1 + drivers/net/wireless/ath/ath9k/eeprom_9287.c | 7 ++++++- drivers/net/wireless/ath/ath9k/hif_usb.c | 8 ++------ drivers/net/wireless/ath/ath9k/htc_drv_init.c | 1 + drivers/net/wireless/ath/ath9k/reg.h | 1 + 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h index 8750c558c221..7f48df1e2903 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.h +++ b/drivers/net/wireless/ath/ath9k/eeprom.h @@ -191,6 +191,7 @@ #define AR9287_EEP_NO_BACK_VER AR9287_EEP_MINOR_VER_1 #define AR9287_EEP_START_LOC 128 +#define AR9287_HTC_EEP_START_LOC 256 #define AR9287_NUM_2G_CAL_PIERS 3 #define AR9287_NUM_2G_CCK_TARGET_POWERS 3 #define AR9287_NUM_2G_20_TARGET_POWERS 3 diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c index 4a52cf03808b..dff2da777312 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c @@ -34,9 +34,14 @@ static bool ath9k_hw_ar9287_fill_eeprom(struct ath_hw *ah) struct ar9287_eeprom *eep = &ah->eeprom.map9287; struct ath_common *common = ath9k_hw_common(ah); u16 *eep_data; - int addr, eep_start_loc = AR9287_EEP_START_LOC; + int addr, eep_start_loc; eep_data = (u16 *)eep; + if (ah->hw_version.devid == 0x7015) + eep_start_loc = AR9287_HTC_EEP_START_LOC; + else + eep_start_loc = AR9287_EEP_START_LOC; + if (!ath9k_hw_use_flash(ah)) { ath_print(common, ATH_DBG_EEPROM, "Reading from EEPROM, not flash\n"); diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 61c1bee3f26a..17e7a9a367e7 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -799,7 +799,7 @@ static int ath9k_hif_usb_download_fw(struct hif_device_usb *hif_dev) } kfree(buf); - if (hif_dev->device_id == 0x7010) + if ((hif_dev->device_id == 0x7010) || (hif_dev->device_id == 0x7015)) firm_offset = AR7010_FIRMWARE_TEXT; else firm_offset = AR9271_FIRMWARE_TEXT; @@ -901,6 +901,7 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, switch(hif_dev->device_id) { case 0x7010: + case 0x7015: case 0x9018: if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x0202) hif_dev->fw_name = FIRMWARE_AR7010_1_1; @@ -912,11 +913,6 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, break; } - if (!hif_dev->fw_name) { - dev_err(&udev->dev, "Can't determine firmware !\n"); - goto err_htc_hw_alloc; - } - ret = ath9k_hif_usb_dev_init(hif_dev); if (ret) { ret = -EINVAL; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 148b43317fdb..2d4279191d7a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -245,6 +245,7 @@ static int ath9k_init_htc_services(struct ath9k_htc_priv *priv, u16 devid) switch(devid) { case 0x7010: + case 0x7015: case 0x9018: priv->htc->credits = 45; break; diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index 633e3d949ec0..d01c4adab8d6 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -899,6 +899,7 @@ #define AR_DEVID_7010(_ah) \ (((_ah)->hw_version.devid == 0x7010) || \ + ((_ah)->hw_version.devid == 0x7015) || \ ((_ah)->hw_version.devid == 0x9018)) #define AR_RADIO_SREV_MAJOR 0xf0 From 6ccf15a1a76d2ff915cdef6ae4d12d0170087118 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Fri, 13 Aug 2010 11:27:28 -0400 Subject: [PATCH 021/104] ath5k: disable ASPM L0s for all cards Atheros PCIe wireless cards handled by ath5k do require L0s disabled. For distributions shipping with CONFIG_PCIEASPM (this will be enabled by default in the future in 2.6.36) this will also mean both L1 and L0s will be disabled when a pre 1.1 PCIe device is detected. We do know L1 works correctly even for all ath5k pre 1.1 PCIe devices though but cannot currently undue the effect of a blacklist, for details you can read pcie_aspm_sanity_check() and see how it adjusts the device link capability. It may be possible in the future to implement some PCI API to allow drivers to override blacklists for pre 1.1 PCIe but for now it is best to accept that both L0s and L1 will be disabled completely for distributions shipping with CONFIG_PCIEASPM rather than having this issue present. Motivation for adding this new API will be to help with power consumption for some of these devices. Example of issues you'd see: - On the Acer Aspire One (AOA150, Atheros Communications Inc. AR5001 Wireless Network Adapter [168c:001c] (rev 01)) doesn't work well with ASPM enabled, the card will eventually stall on heavy traffic with often 'unsupported jumbo' warnings appearing. Disabling ASPM L0s in ath5k fixes these problems. - On the same card you would see a storm of RXORN interrupts even though medium is idle. Credit for root causing and fixing the bug goes to Jussi Kivilinna. Cc: David Quan Cc: Matthew Garrett Cc: Tim Gardner Cc: Jussi Kivilinna Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Maxim Levitsky Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/base.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 0d5de2574dd1..373dcfec689c 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -476,6 +477,26 @@ ath5k_pci_probe(struct pci_dev *pdev, int ret; u8 csz; + /* + * L0s needs to be disabled on all ath5k cards. + * + * For distributions shipping with CONFIG_PCIEASPM (this will be enabled + * by default in the future in 2.6.36) this will also mean both L1 and + * L0s will be disabled when a pre 1.1 PCIe device is detected. We do + * know L1 works correctly even for all ath5k pre 1.1 PCIe devices + * though but cannot currently undue the effect of a blacklist, for + * details you can read pcie_aspm_sanity_check() and see how it adjusts + * the device link capability. + * + * It may be possible in the future to implement some PCI API to allow + * drivers to override blacklists for pre 1.1 PCIe but for now it is + * best to accept that both L0s and L1 will be disabled completely for + * distributions shipping with CONFIG_PCIEASPM rather than having this + * issue present. Motivation for adding this new API will be to help + * with power consumption for some of these devices. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "can't enable device\n"); From 1aa54bca6ee0d07ebcafb8ca8074b624d80724aa Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 28 Jul 2010 01:18:01 +0200 Subject: [PATCH 022/104] tracing: Sanitize value returned from write(trace_marker, "...", len) When userspace code writes non-new-line-terminated string to trace_marker file, write handler appends new-line and returns number of bytes written to trace buffer, so write(fd, "abc", 3) will return 4 That's unexpected and unfortunately it confuses glibc's fprintf function. Example: int main() { fprintf(stderr, "abc"); return 0; } $ gcc test.c -o test $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer $ ./test 2>/sys/kernel/debug/tracing/trace_marker results in infinite loop: write(fd, "abc", 3) = 4 write(fd, "", 1) = 0 write(fd, "", 1) = 0 write(fd, "", 1) = 0 write(fd, "", 1) = 0 write(fd, "", 1) = 0 write(fd, "", 1) = 0 write(fd, "", 1) = 0 (...) ...and kernel trace buffer full of empty markers. Fix it by sanitizing write return value. Signed-off-by: Marcin Slusarz LKML-Reference: <20100727231801.GB2826@joi.lan> Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 086d36316805..88b42d14d32d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3498,6 +3498,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { char *buf; + size_t written; if (tracing_disabled) return -EINVAL; @@ -3519,11 +3520,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, } else buf[cnt] = '\0'; - cnt = mark_printk("%s", buf); + written = mark_printk("%s", buf); kfree(buf); - *fpos += cnt; + *fpos += written; - return cnt; + /* don't tell userspace we wrote more - it might confuse them */ + if (written > cnt) + written = cnt; + + return written; } static int tracing_clock_show(struct seq_file *m, void *v) From 2f174847b0fdd4eb9f482030a284db24aef7a97f Mon Sep 17 00:00:00 2001 From: eric miao Date: Thu, 12 Aug 2010 16:43:00 +0100 Subject: [PATCH 023/104] ARM: 6326/1: kgdb: fix GDB_MAX_REGS no longer used According to commit 22eeef4bb2a7fd225089c0044060ed1fbf091958 kgdb,arm: Individual register get/set for arm It's now replaced by DBG_MAX_REG_NUM. Cc: Jason Wessel Signed-off-by: Eric Miao Signed-off-by: Russell King --- arch/arm/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7024ff..d6e8b4d2e60d 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -79,7 +79,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) return; /* Initialize to zero */ - for (regno = 0; regno < GDB_MAX_REGS; regno++) + for (regno = 0; regno < DBG_MAX_REG_NUM; regno++) gdb_regs[regno] = 0; /* Otherwise, we have only some registers from switch_to() */ From bd365591dfb942d1447b8043e21e8ff112630db4 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Sat, 14 Aug 2010 23:56:21 +0100 Subject: [PATCH 024/104] ARM: 6328/1: Build with -fno-dwarf2-cfi-asm Commit d0679c7 restricted this workaround to powerpc only, but it turns out that ARM needs it as well. Fixes https://bugzilla.kernel.org/show_bug.cgi?id=16310 . Reported-and-Tested-by: Robert Nelson Acked-by: Mikael Pettersson Signed-off-by: Michal Marek Signed-off-by: Russell King --- arch/arm/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 99b8200138d2..59c1ce858fc8 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -21,6 +21,9 @@ GZFLAGS :=-9 # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) +# Never generate .eh_frame +KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) + # Do not use arch/arm/defconfig - it's always outdated. # Select a platform tht is kept up-to-date KBUILD_DEFCONFIG := versatile_defconfig From 2f09a4d5daaa36690d506fafda9c24f2be866f6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Aug 2010 22:38:09 -0700 Subject: [PATCH 025/104] xfrm: Use GFP_ATOMIC in xfrm_compile_policy As xfrm_compile_policy runs within a read_lock, we cannot use GFP_KERNEL for memory allocations. Reported-by: Luca Tettamanti Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ba59983aaffe..b14ed4b1f27c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(net, GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; From f3d3f616e35db2ceeb11564eafd50759bb5bca8a Mon Sep 17 00:00:00 2001 From: Min Zhang Date: Sat, 14 Aug 2010 22:42:51 -0700 Subject: [PATCH 026/104] ipv6: remove sysctl jiffies conversion on gc_elasticity and min_adv_mss sysctl output ipv6 gc_elasticity and min_adv_mss as values divided by HZ. However, they are not in unit of jiffies, since ip6_rt_min_advmss refers to packet size and ip6_rt_fc_elasticity is used as scaler as in expire>>ip6_rt_gc_elasticity, so replace the jiffies conversion handler will regular handler for them. This has impact on scripts that are currently working assuming the divide by HZ, will yield different results with this patch in place. Signed-off-by: Min Zhang Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f2d0400cf8a..d126365ac046 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "mtu_expires", @@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "gc_min_interval_ms", From 21d93e2e29722d7832f61cc56d73fb953ee6578e Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Sun, 15 Aug 2010 10:47:23 +0100 Subject: [PATCH 027/104] ARM: 6329/1: wire up sys_accept4() on ARM sys_accept4() was added in kernel 2.6.28, but ARM was not updated to include it. The number and types of parameters is such that no ARM-specific processing is needed, so wiring up sys_accept4() just requires defining __NR_accept4 and adding a direct call in the syscall entry table. Tested with an EABI 2.6.35 kernel and Ulrich Drepper's original accept4() test program, modified to define __NR_accept4 for ARM. Using the updated unistd.h also eliminates a warning then building glibc (2.10.2 and newer) about accept4() being unimplemented. Signed-off-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/include/asm/unistd.h | 1 + arch/arm/kernel/calls.S | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index dd2bf53000fe..d02cfb683487 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -392,6 +392,7 @@ #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) #define __NR_perf_event_open (__NR_SYSCALL_BASE+364) #define __NR_recvmmsg (__NR_SYSCALL_BASE+365) +#define __NR_accept4 (__NR_SYSCALL_BASE+366) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 37ae301cc47c..afeb71fa72cb 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -375,6 +375,7 @@ CALL(sys_rt_tgsigqueueinfo) CALL(sys_perf_event_open) /* 365 */ CALL(sys_recvmmsg) + CALL(sys_accept4) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted From 41e2e8fd34fff909a0e40129f6ac4233ecfa67a9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 13 Aug 2010 23:33:46 +0100 Subject: [PATCH 028/104] ARM: Tighten check for allowable CPSR values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Arve Hjønnevåg Acked-by: Dima Zavin Signed-off-by: Russell King --- arch/arm/include/asm/ptrace.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index c974be8913a7..7ce15eb15f72 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -158,15 +158,24 @@ struct pt_regs { */ static inline int valid_user_regs(struct pt_regs *regs) { - if (user_mode(regs) && (regs->ARM_cpsr & PSR_I_BIT) == 0) { - regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT); - return 1; + unsigned long mode = regs->ARM_cpsr & MODE_MASK; + + /* + * Always clear the F (FIQ) and A (delayed abort) bits + */ + regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT); + + if ((regs->ARM_cpsr & PSR_I_BIT) == 0) { + if (mode == USR_MODE) + return 1; + if (elf_hwcap & HWCAP_26BIT && mode == USR26_MODE) + return 1; } /* * Force CPSR to something logical... */ - regs->ARM_cpsr &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | PSR_T_BIT | MODE32_BIT; + regs->ARM_cpsr &= PSR_f | PSR_s | PSR_x | PSR_T_BIT | MODE32_BIT; if (!(elf_hwcap & HWCAP_26BIT)) regs->ARM_cpsr |= USR_MODE; From af4e36318edb848fcc0a8d5f75000ca00cdc7595 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Aug 2010 12:42:24 +0900 Subject: [PATCH 029/104] nilfs2: fix list corruption after ifile creation failure If nilfs_attach_checkpoint() gets a memory allocation failure during creation of ifile, it will return without removing nilfs_sb_info struct from ns_supers list. When a concurrently mounted snapshot is unmounted or another new snapshot is mounted after that, this causes kernel oops as below: > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: [] nilfs_find_sbinfo+0x74/0xa4 [nilfs2] > *pde = 00000000 > Oops: 0000 [#1] SMP > Call Trace: > [] ? nilfs_get_sb+0x165/0x532 [nilfs2] > [] ? ida_get_new_above+0x16d/0x187 > [] ? alloc_vfsmnt+0x7e/0x10a > [] ? kstrdup+0x2c/0x40 > [] ? vfs_kern_mount+0x96/0x14e > [] ? do_kern_mount+0x32/0xbd > [] ? do_mount+0x642/0x6a1 > [] ? do_page_fault+0x0/0x2d1 > [] ? copy_mount_options+0x80/0xe2 > [] ? strndup_user+0x48/0x67 > [] ? sys_mount+0x61/0x90 > [] ? sysenter_do_call+0x12/0x22 This fixes the problem. Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: stable@kernel.org --- fs/nilfs2/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..bee60c04109a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); + err = -ENOMEM; sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) - return -ENOMEM; + goto delist; down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; + delist: down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); up_write(&nilfs->ns_super_sem); From ea1a16f7168ac19d974ac51b47593b92280e7992 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 15 Aug 2010 20:16:11 +0900 Subject: [PATCH 030/104] nilfs2: fix false warning saying one of two super blocks is broken After applying commit b2ac86e1, the following message got appeared after unclean shutdown: > NILFS warning: broken superblock. using spare superblock. This turns out to be a false message due to the change which updates two super blocks alternately. The secondary super block now can be selected if it's newer than the primary one. This kills the false warning by suppressing it if another super block is not actually broken. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/the_nilfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 37de1f062d81..6af1c0073e9e 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -608,11 +608,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " "using spare superblock.\n"); + if (swp) nilfs_swap_super_block(nilfs); - } nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); From a5ba6beb839cfa288960c92cd2668a2601c24dda Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Aug 2010 08:08:48 +0200 Subject: [PATCH 031/104] ALSA: riptide - Fix detection / load of firmware files The detection and loading of firmeware on riptide driver has been broken due to rewrite of some codes, checking the presense wrongly. This patch fixes the logic again. Reference: kernel bug 16596 https://bugzilla.kernel.org/show_bug.cgi?id=16596 Cc: Signed-off-by: Takashi Iwai --- sound/pci/riptide/riptide.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index f64fb7d988cb..ad5202efd7a9 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -1224,15 +1224,14 @@ static int try_to_load_firmware(struct cmdif *cif, struct snd_riptide *chip) firmware.firmware.ASIC, firmware.firmware.CODEC, firmware.firmware.AUXDSP, firmware.firmware.PROG); + if (!chip) + return 1; + for (i = 0; i < FIRMWARE_VERSIONS; i++) { if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware))) - break; - } - if (i >= FIRMWARE_VERSIONS) - return 0; /* no match */ + return 1; /* OK */ - if (!chip) - return 1; /* OK */ + } snd_printdd("Writing Firmware\n"); if (!chip->fw_entry) { From daa3766e705c2605bd7f63b80c7742014ef9e04e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 15 Aug 2010 23:21:50 -0700 Subject: [PATCH 032/104] Revert "netlink: netlink_recvmsg() fix" This reverts commit 1235f504aaba2ebeabc863fdb3ceac764a317d47. It causes regressions worse than the problem it was trying to fix. Eric will try to solve the problem another way. Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2cbf380377d5..8648a9922aab 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb; + struct sk_buff *skb, *frag __maybe_unused = NULL; int err; if (flags&MSG_OOB) @@ -1441,21 +1441,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - /* - * Before setting frag_list to NULL, we must get a - * private copy of skb if shared (because of MSG_PEEK) - */ - if (skb_shared(skb)) { - struct sk_buff *nskb; - - nskb = pskb_copy(skb, GFP_KERNEL); - kfree_skb(skb); - skb = nskb; - err = -ENOMEM; - if (!skb) - goto out; - } - kfree_skb(skb_shinfo(skb)->frag_list); + frag = skb_shinfo(skb)->frag_list; skb_shinfo(skb)->frag_list = NULL; } } @@ -1492,6 +1478,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + skb_shinfo(skb)->frag_list = frag; +#endif + skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) From c3e68fad88143fd1fe8fe640207fb19c0f087dbc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Aug 2010 10:15:57 +0200 Subject: [PATCH 033/104] ALSA: hda - Add quirk for Dell Vostro 1220 model=dell-vostro is needed for Dell Vostro 1220 with Coexnat 5067. Reference: Novell bnc#631066 https://bugzilla.novell.com/show_bug.cgi?id=631066 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 31b5d9eeba68..c424952a734e 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3049,6 +3049,7 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = { SND_PCI_QUIRK(0x1028, 0x02f5, "Dell", CXT5066_DELL_LAPTOP), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT5066_OLPC_XO_1_5), + SND_PCI_QUIRK(0x1028, 0x02d8, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD), SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5), From b2c1e07b81a126e5846dfc3d36f559d861df59f4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Aug 2010 11:46:57 +0100 Subject: [PATCH 034/104] ASoC: Remove DSP mode support for WM8776 This is not supported by current hardware revisions. Signed-off-by: Mark Brown Acked-by: Liam Girdwood Cc: stable@kernel.org --- sound/soc/codecs/wm8776.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c index 4e212ed62ea6..f8154e661524 100644 --- a/sound/soc/codecs/wm8776.c +++ b/sound/soc/codecs/wm8776.c @@ -178,13 +178,6 @@ static int wm8776_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) case SND_SOC_DAIFMT_LEFT_J: iface |= 0x0001; break; - /* FIXME: CHECK A/B */ - case SND_SOC_DAIFMT_DSP_A: - iface |= 0x0003; - break; - case SND_SOC_DAIFMT_DSP_B: - iface |= 0x0007; - break; default: return -EINVAL; } From e91846213241e3c46da8cbe992bceb1697de8d78 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Aug 2010 10:43:54 -0300 Subject: [PATCH 035/104] perf annotate tui: Fix exit and RIGHT keys handling As part of ongoing effort to reduce the coupling with libnewt, browsers are being changed to return the exit key. The annotate browser is not returning it as expected by builtin-annotate when annotating multiple symbols (when 'perf annotate' is called without specifying a symbol name). Fix it by returning the exit key and also adding the RIGHT key as a exit key so that going to the next symbol in the TUI can work again. Cc: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 55ff792459ac..a90273e63f4f 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -146,6 +146,7 @@ static int annotate_browser__run(struct annotate_browser *self, return -1; newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); + newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); nd = self->curr_hot; if (nd) { @@ -178,7 +179,7 @@ static int annotate_browser__run(struct annotate_browser *self, } out: ui_browser__hide(&self->b); - return 0; + return es->u.key; } int hist_entry__tui_annotate(struct hist_entry *self) From 83e4491818040ae6b2d9fc60434616304a101d39 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Mon, 16 Aug 2010 16:01:28 +0200 Subject: [PATCH 036/104] USB HID: Add ID for eGalax Multitouch used in JooJoo tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The JooJoo tablet (http://thejoojoo.com/) contains an "eGalax Inc. USB TouchController", and this patch hooks it up to the egalax-touch driver. Without the patch we don't get any cursor motion, since it comes through Z/RX rather than X/Y. (The egalax-touch driver does not yet generate a correct event sequence for the "serial" protocol used by this device, though -- see the note added to the code, which comes from research by Stéphane Chatty.) Cc: Jiri Kosina Cc: Stéphane Chatty Signed-off-by: Chris Ball Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-egalax.c | 9 +++++++++ drivers/hid/hid-ids.h | 1 + 3 files changed, 11 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e635199a0cd2..0c52899be964 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1299,6 +1299,7 @@ static const struct hid_device_id hid_blacklist[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) }, { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) }, { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) }, diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c index f44bdc084cb2..8ca7f65cf2f8 100644 --- a/drivers/hid/hid-egalax.c +++ b/drivers/hid/hid-egalax.c @@ -159,6 +159,13 @@ static int egalax_event(struct hid_device *hid, struct hid_field *field, { struct egalax_data *td = hid_get_drvdata(hid); + /* Note, eGalax has two product lines: the first is resistive and + * uses a standard parallel multitouch protocol (product ID == + * 48xx). The second is capacitive and uses an unusual "serial" + * protocol with a different message for each multitouch finger + * (product ID == 72xx). We do not yet generate a correct event + * sequence for the capacitive/serial protocol. + */ if (hid->claimed & HID_CLAIMED_INPUT) { struct input_dev *input = field->hidinput->input; @@ -246,6 +253,8 @@ static void egalax_remove(struct hid_device *hdev) static const struct hid_device_id egalax_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) }, { } }; MODULE_DEVICE_TABLE(hid, egalax_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d3fc13ae094d..85c6d13c9ffa 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -188,6 +188,7 @@ #define USB_VENDOR_ID_DWAV 0x0eef #define USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER 0x0001 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH 0x480d +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1 0x720c #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 From 5629236b31239dbaa182cb7eb39aad4d62278f7c Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Tue, 3 Aug 2010 19:44:16 +0400 Subject: [PATCH 037/104] x86: intel_ips: do not use PCI resources before pci_enable_device() IRQ and resource[] may not have correct values until after PCI hotplug setup occurs at pci_enable_device() time. The semantic match that finds this problem is as follows: // @@ identifier x; identifier request ~= "pci_request.*|pci_resource.*"; @@ ( * x->irq | * x->resource | * request(x, ...) ) ... *pci_enable_device(x) // Signed-off-by: Kulikov Vasiliy Reviewed-by: Jesse Barnes Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_ips.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index afe82e50dfea..fad59b69a69c 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -1432,6 +1432,12 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) spin_lock_init(&ips->turbo_status_lock); + ret = pci_enable_device(dev); + if (ret) { + dev_err(&dev->dev, "can't enable PCI device, aborting\n"); + goto error_free; + } + if (!pci_resource_start(dev, 0)) { dev_err(&dev->dev, "TBAR not assigned, aborting\n"); ret = -ENXIO; @@ -1444,11 +1450,6 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) goto error_free; } - ret = pci_enable_device(dev); - if (ret) { - dev_err(&dev->dev, "can't enable PCI device, aborting\n"); - goto error_free; - } ips->regmap = ioremap(pci_resource_start(dev, 0), pci_resource_len(dev, 0)); From 2e0ee69c214a0197e1b081ffec9c409ab2a5f094 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 5 Aug 2010 22:24:12 +0200 Subject: [PATCH 038/104] drivers/platform/x86: Adjust confusing if indentation The assignment of ret to -EIO appears to only make sense if the branch that it is aligned with is executed, so move it into that branch. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r disable braces4@ position p1,p2; statement S1,S2; @@ ( if (...) { ... } | if (...) S1@p1 S2@p2 ) @script:python@ p1 << r.p1; p2 << r.p2; @@ if (p1[0].column == p2[0].column): cocci.print_main("branch",p1) cocci.print_secs("after",p2) // Signed-off-by: Julia Lawall Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus_acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index e058c2ba2a15..ca05aefd03bf 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -938,10 +938,11 @@ static int set_brightness(int value) /* SPLV laptop */ if (hotk->methods->brightness_set) { if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set, - value, NULL)) + value, NULL)) { printk(KERN_WARNING "Asus ACPI: Error changing brightness\n"); ret = -EIO; + } goto out; } @@ -953,10 +954,11 @@ static int set_brightness(int value) hotk->methods->brightness_down, NULL, NULL); (value > 0) ? value-- : value++; - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { printk(KERN_WARNING "Asus ACPI: Error changing brightness\n"); ret = -EIO; + } } out: return ret; From 52d7ee558d3babb4918eed6769f593adc1b6616e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 8 Aug 2010 00:01:12 +0200 Subject: [PATCH 039/104] intel_ips: potential null dereference There is a potential NULL dereference of "limits." We can just return NULL earlier to avoid it. The caller already handles NULL returns. Signed-off-by: Dan Carpenter Reviewed-by: Jesse Barnes Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_ips.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index fad59b69a69c..9024480a8228 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -1342,8 +1342,10 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) limits = &ips_lv_limits; else if (strstr(boot_cpu_data.x86_model_id, "CPU U")) limits = &ips_ulv_limits; - else + else { dev_info(&ips->dev->dev, "No CPUID match found.\n"); + goto out; + } rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power); tdp = turbo_power & TURBO_TDP_MASK; From 122f26726b5e16174bf8a707df14be1d93c49d62 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 9 Aug 2010 23:48:18 -0300 Subject: [PATCH 040/104] thinkpad-acpi: find ACPI video device by synthetic HID The Linux ACPI core locates the ACPI video devices for us and marks them with ACPI_VIDEO_HID. Use that information to locate the video device instead of a half-baked hunt for _BCL. This uncouples the detection of the number of backlight brightness levels on ThinkPads from the ACPI paths in vid_handle. With this change, the driver should be able to always detect whether the ThinkPad uses a 8-level or 16-level brightness scale even on newer models for which the vid_handle paths have not been updated yet. It will skip deactivated devices in the ACPI device tree, which is a change in behaviour. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 52 +++++++--------------------- 1 file changed, 12 insertions(+), 40 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5d6119bed00c..9d6fc4c7c08e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6080,13 +6080,18 @@ static struct backlight_ops ibm_backlight_data = { /* --------------------------------------------------------------------- */ +/* + * Call _BCL method of video device. On some ThinkPads this will + * switch the firmware to the ACPI brightness control mode. + */ + static int __init tpacpi_query_bcl_levels(acpi_handle handle) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; int rc; - if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) { + if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) { obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) { printk(TPACPI_ERR "Unknown _BCL data, " @@ -6103,55 +6108,22 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) return rc; } -static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle, - u32 lvl, void *context, void **rv) -{ - char name[ACPI_PATH_SEGMENT_LENGTH]; - struct acpi_buffer buffer = { sizeof(name), &name }; - - if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) && - !strncmp("_BCL", name, sizeof(name) - 1)) { - BUG_ON(!rv || !*rv); - **(int **)rv = tpacpi_query_bcl_levels(handle); - return AE_CTRL_TERMINATE; - } else { - return AE_OK; - } -} /* * Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map */ static unsigned int __init tpacpi_check_std_acpi_brightness_support(void) { - int status; + acpi_handle video_device; int bcl_levels = 0; - void *bcl_ptr = &bcl_levels; - if (!vid_handle) - TPACPI_ACPIHANDLE_INIT(vid); + tpacpi_acpi_handle_locate("video", ACPI_VIDEO_HID, &video_device); + if (video_device) + bcl_levels = tpacpi_query_bcl_levels(video_device); - if (!vid_handle) - return 0; + tp_features.bright_acpimode = (bcl_levels > 0); - /* - * Search for a _BCL method, and execute it. This is safe on all - * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista - * BIOS in ACPI backlight control mode. We do NOT have to care - * about calling the _BCL method in an enabled video device, any - * will do for our purposes. - */ - - status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3, - tpacpi_acpi_walk_find_bcl, NULL, NULL, - &bcl_ptr); - - if (ACPI_SUCCESS(status) && bcl_levels > 2) { - tp_features.bright_acpimode = 1; - return bcl_levels - 2; - } - - return 0; + return (bcl_levels > 2) ? (bcl_levels - 2) : 0; } /* From 217f09631a420295a9688e18aa4dbad1b385e56c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 9 Aug 2010 23:48:19 -0300 Subject: [PATCH 041/104] thinkpad-acpi: untangle ACPI/vendor backlight selection acpi_video_backlight_support() already tells us if ACPI is handling backlight control through the generic ACPI handle. It is better to just trust it. While at it, adjust down a printk priority, and test earlier for brightness_enable=0. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Matthew Garrett --- Documentation/laptops/thinkpad-acpi.txt | 4 +++ drivers/platform/x86/thinkpad_acpi.c | 42 ++++++++++++------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index f6f80257addb..1565eefd6fd5 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1024,6 +1024,10 @@ ThinkPad-specific interface. The driver will disable its native backlight brightness control interface if it detects that the standard ACPI interface is available in the ThinkPad. +If you want to use the thinkpad-acpi backlight brightness control +instead of the generic ACPI video backlight brightness control for some +reason, you should use the acpi_backlight=vendor kernel parameter. + The brightness_enable module parameter can be used to control whether the LCD brightness control feature will be enabled when available. brightness_enable=0 forces it to be disabled. brightness_enable=1 diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9d6fc4c7c08e..b9430e887e95 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6216,28 +6216,6 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (tp_features.bright_unkfw) return 1; - if (tp_features.bright_acpimode) { - if (acpi_video_backlight_support()) { - if (brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface " - "available, not loading native one.\n"); - return 1; - } else if (brightness_enable == 1) { - printk(TPACPI_NOTICE - "Backlight control force enabled, even if standard " - "ACPI backlight interface is available\n"); - } - } else { - if (brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface not " - "available, thinkpad_acpi native " - "brightness control enabled\n"); - } - } - } - if (!brightness_enable) { dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, "brightness support disabled by " @@ -6245,6 +6223,26 @@ static int __init brightness_init(struct ibm_init_struct *iibm) return 1; } + if (acpi_video_backlight_support()) { + if (brightness_enable > 1) { + printk(TPACPI_INFO + "Standard ACPI backlight interface " + "available, not loading native one.\n"); + return 1; + } else if (brightness_enable == 1) { + printk(TPACPI_WARN + "Cannot enable backlight brightness support, " + "ACPI is already handling it. Refer to the " + "acpi_backlight kernel parameter\n"); + return 1; + } + } else if (tp_features.bright_acpimode && brightness_enable > 1) { + printk(TPACPI_NOTICE + "Standard ACPI backlight interface not " + "available, thinkpad_acpi native " + "brightness control enabled\n"); + } + /* * Check for module parameter bogosity, note that we * init brightness_mode to TPACPI_BRGHT_MODE_MAX in order to be From 34a656d22f5539f613b93e7a1d14b4bd53592505 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 9 Aug 2010 23:48:20 -0300 Subject: [PATCH 042/104] thinkpad-acpi: lock down size of hotkey keymap Use a safer coding style for the hotkey keymap. This does not fix any problems, as the current code is correct. But it might help avoid mistakes in the future. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b9430e887e95..b0b66fbd796c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1911,6 +1911,17 @@ enum { /* hot key scan codes (derived from ACPI DSDT) */ TP_ACPI_HOTKEYSCAN_VOLUMEDOWN, TP_ACPI_HOTKEYSCAN_MUTE, TP_ACPI_HOTKEYSCAN_THINKPAD, + TP_ACPI_HOTKEYSCAN_UNK1, + TP_ACPI_HOTKEYSCAN_UNK2, + TP_ACPI_HOTKEYSCAN_UNK3, + TP_ACPI_HOTKEYSCAN_UNK4, + TP_ACPI_HOTKEYSCAN_UNK5, + TP_ACPI_HOTKEYSCAN_UNK6, + TP_ACPI_HOTKEYSCAN_UNK7, + TP_ACPI_HOTKEYSCAN_UNK8, + + /* Hotkey keymap size */ + TPACPI_HOTKEY_MAP_LEN }; enum { /* Keys/events available through NVRAM polling */ @@ -3113,7 +3124,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) * If the above is too much to ask, don't change the keymap. * Ask the thinkpad-acpi maintainer to do it, instead. */ - static u16 ibm_keycode_map[] __initdata = { + static u16 ibm_keycode_map[TPACPI_HOTKEY_MAP_LEN] __initdata = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ KEY_FN_F1, KEY_FN_F2, KEY_COFFEE, KEY_SLEEP, KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, @@ -3147,7 +3158,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, }; - static u16 lenovo_keycode_map[] __initdata = { + static u16 lenovo_keycode_map[TPACPI_HOTKEY_MAP_LEN] __initdata = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ KEY_FN_F1, KEY_COFFEE, KEY_BATTERY, KEY_SLEEP, KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, @@ -3191,7 +3202,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, }; -#define TPACPI_HOTKEY_MAP_LEN ARRAY_SIZE(ibm_keycode_map) #define TPACPI_HOTKEY_MAP_SIZE sizeof(ibm_keycode_map) #define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(ibm_keycode_map[0]) @@ -3469,7 +3479,8 @@ static bool hotkey_notify_hotkey(const u32 hkey, *send_acpi_ev = true; *ignore_acpi_ev = false; - if (scancode > 0 && scancode < 0x21) { + /* HKEY event 0x1001 is scancode 0x00 */ + if (scancode > 0 && scancode <= TPACPI_HOTKEY_MAP_LEN) { scancode--; if (!(hotkey_source_mask & (1 << scancode))) { tpacpi_input_send_key_masked(scancode); From d1e14dca6a18aa40394316c872993ae3bc7e311a Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 9 Aug 2010 23:48:21 -0300 Subject: [PATCH 043/104] thinkpad-acpi: add support for model-specific keymaps Use the quirks engine to select model-specific keymaps, which makes it much easier to extend should we need it. Keycodes are based on the tables at http://www.thinkwiki.org/wiki/Default_meanings_of_special_keys. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 60 +++++++++++++++++++--------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b0b66fbd796c..9ebe4f7d044e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3093,6 +3093,8 @@ static const struct tpacpi_quirk tpacpi_hotkey_qtable[] __initconst = { TPACPI_Q_IBM('1', 'D', TPACPI_HK_Q_INIMASK), /* X22, X23, X24 */ }; +typedef u16 tpacpi_keymap_t[TPACPI_HOTKEY_MAP_LEN]; + static int __init hotkey_init(struct ibm_init_struct *iibm) { /* Requirements for changing the default keymaps: @@ -3124,9 +3126,17 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) * If the above is too much to ask, don't change the keymap. * Ask the thinkpad-acpi maintainer to do it, instead. */ - static u16 ibm_keycode_map[TPACPI_HOTKEY_MAP_LEN] __initdata = { + + enum keymap_index { + TPACPI_KEYMAP_IBM_GENERIC = 0, + TPACPI_KEYMAP_LENOVO_GENERIC, + }; + + static const tpacpi_keymap_t tpacpi_keymaps[] __initconst = { + /* Generic keymap for IBM ThinkPads */ + [TPACPI_KEYMAP_IBM_GENERIC] = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ - KEY_FN_F1, KEY_FN_F2, KEY_COFFEE, KEY_SLEEP, + KEY_FN_F1, KEY_BATTERY, KEY_COFFEE, KEY_SLEEP, KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND, @@ -3157,8 +3167,10 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, - }; - static u16 lenovo_keycode_map[TPACPI_HOTKEY_MAP_LEN] __initdata = { + }, + + /* Generic keymap for Lenovo ThinkPads */ + [TPACPI_KEYMAP_LENOVO_GENERIC] = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ KEY_FN_F1, KEY_COFFEE, KEY_BATTERY, KEY_SLEEP, KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, @@ -3200,10 +3212,25 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, + }, }; -#define TPACPI_HOTKEY_MAP_SIZE sizeof(ibm_keycode_map) -#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(ibm_keycode_map[0]) + static const struct tpacpi_quirk tpacpi_keymap_qtable[] __initconst = { + /* Generic maps (fallback) */ + { + .vendor = PCI_VENDOR_ID_IBM, + .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY, + .quirks = TPACPI_KEYMAP_IBM_GENERIC, + }, + { + .vendor = PCI_VENDOR_ID_LENOVO, + .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY, + .quirks = TPACPI_KEYMAP_LENOVO_GENERIC, + }, + }; + +#define TPACPI_HOTKEY_MAP_SIZE sizeof(tpacpi_keymap_t) +#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(tpacpi_keymap_t[0]) int res, i; int status; @@ -3212,6 +3239,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) bool tabletsw_state = false; unsigned long quirks; + unsigned long keymap_id; vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, "initializing hotkey subdriver\n"); @@ -3352,7 +3380,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) goto err_exit; /* Set up key map */ - hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE, GFP_KERNEL); if (!hotkey_keycode_map) { @@ -3362,17 +3389,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) goto err_exit; } - if (tpacpi_is_lenovo()) { - dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, - "using Lenovo default hot key map\n"); - memcpy(hotkey_keycode_map, &lenovo_keycode_map, - TPACPI_HOTKEY_MAP_SIZE); - } else { - dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, - "using IBM default hot key map\n"); - memcpy(hotkey_keycode_map, &ibm_keycode_map, - TPACPI_HOTKEY_MAP_SIZE); - } + keymap_id = tpacpi_check_quirks(tpacpi_keymap_qtable, + ARRAY_SIZE(tpacpi_keymap_qtable)); + BUG_ON(keymap_id >= ARRAY_SIZE(tpacpi_keymaps)); + dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, + "using keymap number %lu\n", keymap_id); + + memcpy(hotkey_keycode_map, &tpacpi_keymaps[keymap_id], + TPACPI_HOTKEY_MAP_SIZE); input_set_capability(tpacpi_inputdev, EV_MSC, MSC_SCAN); tpacpi_inputdev->keycodesize = TPACPI_HOTKEY_MAP_TYPESIZE; From 2b75426282a8eb29d0a004ef0d289b0491c719be Mon Sep 17 00:00:00 2001 From: Jens Taprogge Date: Mon, 9 Aug 2010 23:48:22 -0300 Subject: [PATCH 044/104] thinkpad-acpi: Add KEY_CAMERA (Fn-F6) for Lenovo keyboards On the T410s and most likely other current models, Fn-F6 is labeled as Camera/Headphone key. Report key presses as KEY_CAMERA. Signed-off-by: Jens Taprogge Acked-by: Jerone Young Acked-by: Henrique de Moraes Holschuh Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9ebe4f7d044e..e35ed128bdef 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3173,7 +3173,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) [TPACPI_KEYMAP_LENOVO_GENERIC] = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ KEY_FN_F1, KEY_COFFEE, KEY_BATTERY, KEY_SLEEP, - KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, + KEY_WLAN, KEY_CAMERA, KEY_SWITCHVIDEOMODE, KEY_FN_F8, KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND, /* Scan codes 0x0C to 0x1F: Other ACPI HKEY hot keys */ From c3f755e3842108c1cffe570fe9802239810352b6 Mon Sep 17 00:00:00 2001 From: Victor van den Elzen Date: Sun, 15 Aug 2010 01:19:33 +0200 Subject: [PATCH 045/104] platform/x86: move rfkill for Dell Mini 1012 to compal-laptop Like others in the Mini series, the Dell Mini 1012 does not support the smbios hook required by dell-laptop. Signed-off-by: Victor van den Elzen Cc: stable Signed-off-by: Matthew Garrett --- drivers/platform/x86/compal-laptop.c | 9 +++++++++ drivers/platform/x86/dell-laptop.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index d071ce056322..097083cac413 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -840,6 +840,14 @@ static struct dmi_system_id __initdata compal_dmi_table[] = { }, .callback = dmi_check_cb }, + { + .ident = "Dell Mini 1012", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + .callback = dmi_check_cb + }, { .ident = "Dell Inspiron 11z", .matches = { @@ -1092,5 +1100,6 @@ MODULE_ALIAS("dmi:*:rnJHL90:rvrREFERENCE:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron910:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1010:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1011:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1012:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1110:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1210:*"); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index b41ed5cab3e7..4413975912e0 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -121,6 +121,13 @@ static struct dmi_system_id __devinitdata dell_blacklist[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1011"), }, }, + { + .ident = "Dell Mini 1012", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + }, { .ident = "Dell Inspiron 11z", .matches = { From 0a492896ac07336c98f37ad7fab4a6387b6ada78 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 15 Aug 2010 00:26:14 -0700 Subject: [PATCH 046/104] sparc: Really fix "console=" for serial consoles. If a video head and keyboard are hooked up, specifying "console=ttyS0" or similar to use a serial console will not work properly. The key issue is that we must register all serial console capable devices with register_console(), otherwise the command line specified device won't be found. The sun serial drivers would only register themselves as console devices if the OpenFirmware specified console device node matched. To fix this part we now unconditionally get the serial console register by setting serial_drv->cons always. Secondarily we must not add_preferred_console() using the firmware provided console setting if the user gaven an override on the kernel command line using "console=" The "primary framebuffer" matching logic was always triggering o n openfirmware device node match, make it not when a command line override was given. Reported-by: Frans Pop Tested-by: Frans Pop Signed-off-by: David S. Miller --- arch/sparc/include/asm/fb.h | 4 ++++ drivers/serial/suncore.c | 15 +++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h index e834880be204..2173432ad7f7 100644 --- a/arch/sparc/include/asm/fb.h +++ b/arch/sparc/include/asm/fb.h @@ -1,5 +1,6 @@ #ifndef _SPARC_FB_H_ #define _SPARC_FB_H_ +#include #include #include #include @@ -18,6 +19,9 @@ static inline int fb_is_primary_device(struct fb_info *info) struct device *dev = info->device; struct device_node *node; + if (console_set_on_cmdline) + return 0; + node = dev->of_node; if (node && node == of_console_device) diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c index 544f2e25d0e5..6381a0282ee7 100644 --- a/drivers/serial/suncore.c +++ b/drivers/serial/suncore.c @@ -55,7 +55,12 @@ EXPORT_SYMBOL(sunserial_unregister_minors); int sunserial_console_match(struct console *con, struct device_node *dp, struct uart_driver *drv, int line, bool ignore_line) { - if (!con || of_console_device != dp) + if (!con) + return 0; + + drv->cons = con; + + if (of_console_device != dp) return 0; if (!ignore_line) { @@ -69,12 +74,10 @@ int sunserial_console_match(struct console *con, struct device_node *dp, return 0; } - con->index = line; - drv->cons = con; - - if (!console_set_on_cmdline) + if (!console_set_on_cmdline) { + con->index = line; add_preferred_console(con->name, line, NULL); - + } return 1; } EXPORT_SYMBOL(sunserial_console_match); From b9e0ba8114583b0462cfb18b4ca01fc82894c5c0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 11 Aug 2010 20:56:03 +0400 Subject: [PATCH 047/104] booting-without-of: Remove nonexistent chapters from TOC, fix numbering Marvell and GPIO bindings live in their own files, so the TOC should not mention them. Also fix chapters numbering. Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- Documentation/powerpc/booting-without-of.txt | 31 ++------------------ 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 568fa08e82e5..302db5da49b3 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -49,40 +49,13 @@ Table of Contents f) MDIO on GPIOs g) SPI busses - VII - Marvell Discovery mv64[345]6x System Controller chips - 1) The /system-controller node - 2) Child nodes of /system-controller - a) Marvell Discovery MDIO bus - b) Marvell Discovery ethernet controller - c) Marvell Discovery PHY nodes - d) Marvell Discovery SDMA nodes - e) Marvell Discovery BRG nodes - f) Marvell Discovery CUNIT nodes - g) Marvell Discovery MPSCROUTING nodes - h) Marvell Discovery MPSCINTR nodes - i) Marvell Discovery MPSC nodes - j) Marvell Discovery Watch Dog Timer nodes - k) Marvell Discovery I2C nodes - l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes - m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes - n) Marvell Discovery GPP (General Purpose Pins) nodes - o) Marvell Discovery PCI host bridge node - p) Marvell Discovery CPU Error nodes - q) Marvell Discovery SRAM Controller nodes - r) Marvell Discovery PCI Error Handler nodes - s) Marvell Discovery Memory Controller nodes - - VIII - Specifying interrupt information for devices + VII - Specifying interrupt information for devices 1) interrupts property 2) interrupt-parent property 3) OpenPIC Interrupt Controllers 4) ISA Interrupt Controllers - IX - Specifying GPIO information for devices - 1) gpios property - 2) gpio-controller nodes - - X - Specifying device power management information (sleep property) + VIII - Specifying device power management information (sleep property) Appendix A - Sample SOC node for MPC8540 From a8dcb878b628203f917c50b0268bba7fd146d9c0 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 16 Aug 2010 10:31:53 +0200 Subject: [PATCH 048/104] microblaze: Fix of/address: Merge all of the bus translation code Commit dbbdee94734bf6f1db7af42008a53655e77cab8f removed of_irq_pci_swizzle but didn't use pci_swizzle_interrupt_pin instead. Signed-off-by: Michal Simek Signed-off-by: Grant Likely --- arch/microblaze/kernel/prom_parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c index d33ba17601fa..99d9b61cccb5 100644 --- a/arch/microblaze/kernel/prom_parse.c +++ b/arch/microblaze/kernel/prom_parse.c @@ -73,7 +73,7 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) /* We can only get here if we hit a P2P bridge with no node, * let's do standard swizzling and try again */ - lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec); + lspec = pci_swizzle_interrupt_pin(pdev, lspec); pdev = ppdev; } From 088ab302f23bfc8abfaada32506d02d64a637b23 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 16 Aug 2010 10:31:54 +0200 Subject: [PATCH 049/104] microblaze: Fix of: eliminate of_device->node and dev_archdata->{of,prom}_node Commit 58f9b0b02414062eaff46716bc04b47d7e79add5 should contain this fix too. Signed-off-by: Michal Simek Signed-off-by: Grant Likely --- arch/microblaze/pci/pci-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 23be25fec4d6..6ca8531a539e 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -1077,7 +1077,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) struct dev_archdata *sd = &dev->dev.archdata; /* Setup OF node pointer in archdata */ - sd->of_node = pci_device_to_OF_node(dev); + dev->dev.of_node = pci_device_to_OF_node(dev); /* Fixup NUMA node as it may not be setup yet by the generic * code and is needed by the DMA init From b590cddfa6f40447158323b43a13cdae01d9a051 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 16 Aug 2010 15:58:29 -0500 Subject: [PATCH 050/104] kdb: fix compile error without CONFIG_KALLSYMS If CONFIG_KGDB_KDB is set and CONFIG_KALLSYMS is not set the kernel will fail to build with the error: kernel/built-in.o: In function `kallsyms_symbol_next': kernel/debug/kdb/kdb_support.c:237: undefined reference to `kdb_walk_kallsyms' kernel/built-in.o: In function `kallsyms_symbol_complete': kernel/debug/kdb/kdb_support.c:193: undefined reference to `kdb_walk_kallsyms' The kdb_walk_kallsyms needs a #ifdef proper header to match the C implementation. This patch also fixes the compiler warnings in kdb_support.c when compiling without CONFIG_KALLSYMS set. The compiler warnings are a result of the kallsyms_lookup() macro not initializing the two of the pass by reference variables. Signed-off-by: Jason Wessel Reported-by: Michal Simek --- kernel/debug/kdb/kdb_private.h | 7 +++++++ kernel/debug/kdb/kdb_support.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index c438f545a321..be775f7e81e0 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -255,7 +255,14 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); +#ifdef CONFIG_KALLSYMS extern const char *kdb_walk_kallsyms(loff_t *pos); +#else /* ! CONFIG_KALLSYMS */ +static inline const char *kdb_walk_kallsyms(loff_t *pos) +{ + return NULL; +} +#endif /* ! CONFIG_KALLSYMS */ extern char *kdb_getstr(char *, size_t, char *); /* Defines for kdb_symbol_print */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 45344d5c53dd..6b2485dcb050 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -82,8 +82,8 @@ static char *kdb_name_table[100]; /* arbitrary size */ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) { int ret = 0; - unsigned long symbolsize; - unsigned long offset; + unsigned long symbolsize = 0; + unsigned long offset = 0; #define knt1_size 128 /* must be >= kallsyms table size */ char *knt1 = NULL; From 8c8aefce934dc45de641fe78d48ff1b7722d826a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 11:00:59 -0700 Subject: [PATCH 051/104] kgdb: add missing __percpu markup in arch/x86/kernel/kgdb.c breakinfo->pev is a pointer to percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ef10940e1af0..852b81967a37 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -194,7 +194,7 @@ static struct hw_breakpoint { unsigned long addr; int len; int type; - struct perf_event **pev; + struct perf_event * __percpu *pev; } breakinfo[HBP_NUM]; static unsigned long early_dr7; From 787ea1c5811b11124d28c8d85f483c4cd37f5c58 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 16 Aug 2010 15:58:30 -0500 Subject: [PATCH 052/104] arm,kgdb: fix GDB_MAX_REGS no longer used According to commit 22eeef4bb2a7fd225089c0044060ed1fbf091958 kgdb,arm: Individual register get/set for arm It's now replaced by DBG_MAX_REG_NUM. Signed-off-by: Eric Miao Signed-off-by: Jason Wessel --- arch/arm/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7024ff..d6e8b4d2e60d 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -79,7 +79,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) return; /* Initialize to zero */ - for (regno = 0; regno < GDB_MAX_REGS; regno++) + for (regno = 0; regno < DBG_MAX_REG_NUM; regno++) gdb_regs[regno] = 0; /* Otherwise, we have only some registers from switch_to() */ From fed891c89bfd8ef898c2289b34e748969bb12798 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 16 Aug 2010 15:58:30 -0500 Subject: [PATCH 053/104] vt: fix regression warnings from KMS merge Fix the following new sparse warnings in vt.c introduced by the commit b45cfba4e9005d64d419718e7ff7f7cab44c1994 (vt,console,kdb: implement atomic console enter/leave functions): drivers/char/vt.c:197:5: warning: symbol 'saved_fg_console' was not declared. Should it be static? drivers/char/vt.c:198:5: warning: symbol 'saved_last_console' was not declared. Should it be static? drivers/char/vt.c:199:5: warning: symbol 'saved_want_console' was not declared. Should it be static? drivers/char/vt.c:200:5: warning: symbol 'saved_vc_mode' was not declared. Should it be static? Signed-off-by: Jason Wessel Reviewed-by: Jesse Barnes CC: Andrew Morton --- drivers/char/vt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index c734f9b1263a..3af1a2229fec 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -194,10 +194,10 @@ static DECLARE_WORK(console_work, console_callback); int fg_console; int last_console; int want_console = -1; -int saved_fg_console; -int saved_last_console; -int saved_want_console; -int saved_vc_mode; +static int saved_fg_console; +static int saved_last_console; +static int saved_want_console; +static int saved_vc_mode; /* * For each existing display, we have a pointer to console currently visible From beed5336eba6a5a70c97e9f4bfff525915a25003 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 16 Aug 2010 15:58:31 -0500 Subject: [PATCH 054/104] vt,console,kdb: preserve console_blanked while in kdb Commit b45cfba4e9005d64d419718e7ff7f7cab44c1994 (vt,console,kdb: implement atomic console enter/leave functions) introduced the ability to atomically change the console mode with kernel mode setting but did not preserve the state of the console_blanked variable. The console_blanked variable must be restored when executing the con_debug_leave() or further kernel mode set changes (such as using chvt X) will fail to correctly set the state of console. Signed-off-by: Jason Wessel Reviewed-by: Jesse Barnes CC: Andrew Morton --- drivers/char/vt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 3af1a2229fec..50590c7f2c01 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -198,6 +198,7 @@ static int saved_fg_console; static int saved_last_console; static int saved_want_console; static int saved_vc_mode; +static int saved_console_blanked; /* * For each existing display, we have a pointer to console currently visible @@ -3449,6 +3450,7 @@ int con_debug_enter(struct vc_data *vc) saved_last_console = last_console; saved_want_console = want_console; saved_vc_mode = vc->vc_mode; + saved_console_blanked = console_blanked; vc->vc_mode = KD_TEXT; console_blanked = 0; if (vc->vc_sw->con_debug_enter) @@ -3492,6 +3494,7 @@ int con_debug_leave(void) fg_console = saved_fg_console; last_console = saved_last_console; want_console = saved_want_console; + console_blanked = saved_console_blanked; vc_cons[fg_console].d->vc_mode = saved_vc_mode; vc = vc_cons[fg_console].d; From 60652d07a028595df5c2582e915325d643a3800d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 16 Aug 2010 12:20:59 +1000 Subject: [PATCH 055/104] ata: update for of_device to platform_device replacement Signed-off-by: Stephen Rothwell Signed-off-by: Grant Likely --- drivers/ata/sata_dwc_460ex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index ea24c1e51be2..2673a3d14806 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -1588,7 +1588,7 @@ static const struct ata_port_info sata_dwc_port_info[] = { }, }; -static int sata_dwc_probe(struct of_device *ofdev, +static int sata_dwc_probe(struct platform_device *ofdev, const struct of_device_id *match) { struct sata_dwc_device *hsdev; @@ -1702,7 +1702,7 @@ error_out: return err; } -static int sata_dwc_remove(struct of_device *ofdev) +static int sata_dwc_remove(struct platform_device *ofdev) { struct device *dev = &ofdev->dev; struct ata_host *host = dev_get_drvdata(dev); From 8e8073a449b2e00641c095ad55bd56f43468daf9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Aug 2010 15:04:29 -0700 Subject: [PATCH 056/104] sparc: Hook up new fanotify and prlimit64 syscalls. The only tricky bit is the compat version of fanotify_mark, which which on 32-bit the 64-bit mark argument is passed in as "high32", "low32". Signed-off-by: David S. Miller --- arch/sparc/include/asm/unistd.h | 5 ++++- arch/sparc/kernel/sys32.S | 9 +++++++++ arch/sparc/kernel/systbls_32.S | 3 ++- arch/sparc/kernel/systbls_64.S | 6 ++++-- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index d0b3b01ac9d4..03eb5a8f6f93 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -397,8 +397,11 @@ #define __NR_rt_tgsigqueueinfo 326 #define __NR_perf_event_open 327 #define __NR_recvmmsg 328 +#define __NR_fanotify_init 329 +#define __NR_fanotify_mark 330 +#define __NR_prlimit64 331 -#define NR_syscalls 329 +#define NR_syscalls 332 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 46a76ba3fb4b..44e5faf1ad5f 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -330,6 +330,15 @@ do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ nop nop + .globl sys32_fanotify_mark +sys32_fanotify_mark: + sethi %hi(sys_fanotify_mark), %g1 + sllx %o2, 32, %o2 + or %o2, %o3, %o2 + mov %o4, %o3 + jmpl %g1 + %lo(sys_fanotify_mark), %g0 + mov %o5, %o4 + .section __ex_table,"a" .align 4 .word 1b, __retl_efault, 2b, __retl_efault diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 801fc8e5a0e8..ec396e1916b9 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,6 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init +/*330*/ .long sys_fanotify_mark, sys_prlimit64 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 9db058dd039e..8cfcaa549580 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,8 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init +/*330*/ .word sys32_fanotify_mark, sys_prlimit64 #endif /* CONFIG_COMPAT */ @@ -158,4 +159,5 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init +/*330*/ .word sys_fanotify_mark, sys_prlimit64 From 7cb4dc9fc95f89587f57f287b47e091d7806255e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 11 Aug 2010 11:28:02 +0200 Subject: [PATCH 057/104] AppArmor: fix task_setrlimit prototype After rlimits tree was merged we get the following errors: security/apparmor/lsm.c:663:2: warning: initialization from incompatible pointer type It is because AppArmor was merged in the meantime, but uses the old prototype. So fix it by adding struct task_struct as a first parameter of apparmor_task_setrlimit. NOTE that this is ONLY a compilation warning fix (and crashes caused by that). It needs proper handling in AppArmor depending on who is the 'task'. Signed-off-by: Jiri Slaby Signed-off-by: John Johansen Signed-off-by: James Morris --- security/apparmor/lsm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d5666d3cc21b..f73e2c204218 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -607,8 +607,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, return error; } -static int apparmor_task_setrlimit(unsigned int resource, - struct rlimit *new_rlim) +static int apparmor_task_setrlimit(struct task_struct *task, + unsigned int resource, struct rlimit *new_rlim) { struct aa_profile *profile = aa_current_profile(); int error = 0; From f1ca09b2b5c9dd3988c61818a7d621b1400e4f0c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 16 Aug 2010 23:44:49 -0600 Subject: [PATCH 058/104] of: Fix missing includes This patch fixes missing includes from a number of .c files because the code (wrongfully) depended on prom.h including them. The include of linux/of_address.h was removed in microblaze prom.h in commit "of/address: Clean up function declarations" (sha1 id 22ae782f8), but not fixed in some callers. This patch fixes them up. Signed-off-by: Grant Likely Tested-by: Michal Simek --- arch/microblaze/pci/pci-common.c | 3 ++- arch/microblaze/pci/xilinx_pci.c | 1 + drivers/char/xilinx_hwicap/xilinx_hwicap.c | 1 + drivers/serial/of_serial.c | 3 +-- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 6ca8531a539e..55ef532f32be 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -27,10 +27,11 @@ #include #include #include +#include +#include #include #include -#include #include #include diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c index 7869a41b0f94..0687a42a5bd4 100644 --- a/arch/microblaze/pci/xilinx_pci.c +++ b/arch/microblaze/pci/xilinx_pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index 0ed763cd2e77..b663d573aad9 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -94,6 +94,7 @@ #ifdef CONFIG_OF /* For open firmware. */ +#include #include #include #endif diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index 659a695bdad6..2af8fd113123 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -14,11 +14,10 @@ #include #include #include +#include #include #include -#include - struct of_serial_info { int type; int line; From 625fdcaa6d2a3db6a922bbd833450424e906111c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 12 Aug 2010 12:31:21 -0700 Subject: [PATCH 059/104] latencytop: Fix kconfig dependency warnings warning: (LATENCYTOP && HAVE_LATENCYTOP_SUPPORT) selects SCHED_DEBUG which has unmet direct dependencies (DEBUG_KERNEL && PROC_FS) warning: (LATENCYTOP && HAVE_LATENCYTOP_SUPPORT) selects SCHEDSTATS which has unmet direct dependencies (DEBUG_KERNEL && PROC_FS) Add depends on STACKTRACE_SUPPORT for 'select STACKTRACE'. Add depends on PROC_FS since that is where the output goes. Signed-off-by: Randy Dunlap Cc: Arjan van de Ven LKML-Reference: <20100812123121.a7c99cde.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9e06b7f5ecf1..1b4afd2e6ca0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -994,13 +994,16 @@ config FAULT_INJECTION_STACKTRACE_FILTER config LATENCYTOP bool "Latency measuring infrastructure" + depends on HAVE_LATENCYTOP_SUPPORT + depends on DEBUG_KERNEL + depends on STACKTRACE_SUPPORT + depends on PROC_FS select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE select SCHEDSTATS select SCHED_DEBUG - depends on HAVE_LATENCYTOP_SUPPORT help Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. From c69aefabe004d24e6eedf83b6f253647f77dfc43 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 17 Aug 2010 10:39:22 +0200 Subject: [PATCH 060/104] ALSA: hda - Fix ALC680 base model capture - Fix capture mixer elements for ALC680 base model - Support auto change ADC for recording from MIC - Cancel capture source assigned in auto mode. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 176 +++++++++++++++++++++++++++------- 1 file changed, 144 insertions(+), 32 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2cd1ae809e46..a4dd04524e43 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -19030,6 +19030,7 @@ static int patch_alc888(struct hda_codec *codec) /* * ALC680 support */ +#define ALC680_DIGIN_NID ALC880_DIGIN_NID #define ALC680_DIGOUT_NID ALC880_DIGOUT_NID #define alc680_modes alc260_modes @@ -19044,23 +19045,93 @@ static hda_nid_t alc680_adc_nids[3] = { 0x07, 0x08, 0x09 }; +/* + * Analog capture ADC cgange + */ +static int alc680_capture_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + struct snd_pcm_substream *substream) +{ + struct alc_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + unsigned int pre_mic, pre_line; + + pre_mic = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]); + pre_line = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_LINE]); + + spec->cur_adc_stream_tag = stream_tag; + spec->cur_adc_format = format; + + if (pre_mic || pre_line) { + if (pre_mic) + snd_hda_codec_setup_stream(codec, 0x08, stream_tag, 0, + format); + else + snd_hda_codec_setup_stream(codec, 0x09, stream_tag, 0, + format); + } else + snd_hda_codec_setup_stream(codec, 0x07, stream_tag, 0, format); + return 0; +} + +static int alc680_capture_pcm_cleanup(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + snd_hda_codec_cleanup_stream(codec, 0x07); + snd_hda_codec_cleanup_stream(codec, 0x08); + snd_hda_codec_cleanup_stream(codec, 0x09); + return 0; +} + +static struct hda_pcm_stream alc680_pcm_analog_auto_capture = { + .substreams = 1, /* can be overridden */ + .channels_min = 2, + .channels_max = 2, + /* NID is set in alc_build_pcms */ + .ops = { + .prepare = alc680_capture_pcm_prepare, + .cleanup = alc680_capture_pcm_cleanup + }, +}; + static struct snd_kcontrol_new alc680_base_mixer[] = { /* output mixer control */ HDA_CODEC_VOLUME("Front Playback Volume", 0x2, 0x0, HDA_OUTPUT), HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT), HDA_CODEC_VOLUME("Headphone Playback Volume", 0x4, 0x0, HDA_OUTPUT), HDA_CODEC_MUTE("Headphone Playback Switch", 0x16, 0x0, HDA_OUTPUT), + HDA_CODEC_VOLUME("Int Mic Boost", 0x12, 0, HDA_INPUT), HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT), + HDA_CODEC_VOLUME("Line In Boost", 0x19, 0, HDA_INPUT), { } }; -static struct snd_kcontrol_new alc680_capture_mixer[] = { - HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT), - HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT), - HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT), - HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT), - HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT), - HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT), +static struct hda_bind_ctls alc680_bind_cap_vol = { + .ops = &snd_hda_bind_vol, + .values = { + HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT), + 0 + }, +}; + +static struct hda_bind_ctls alc680_bind_cap_switch = { + .ops = &snd_hda_bind_sw, + .values = { + HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT), + 0 + }, +}; + +static struct snd_kcontrol_new alc680_master_capture_mixer[] = { + HDA_BIND_VOL("Capture Volume", &alc680_bind_cap_vol), + HDA_BIND_SW("Capture Switch", &alc680_bind_cap_switch), { } /* end */ }; @@ -19068,25 +19139,73 @@ static struct snd_kcontrol_new alc680_capture_mixer[] = { * generic initialization of ADC, input mixers and output mixers */ static struct hda_verb alc680_init_verbs[] = { - /* Unmute DAC0-1 and set vol = 0 */ - {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, - {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, - {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, + {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, - {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40}, - {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40}, - {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0}, - {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24}, - {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20}, + {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN}, + {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80}, + {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN}, {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, + + {0x16, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, + {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_MIC_EVENT | AC_USRSP_EN}, + { } }; +/* toggle speaker-output according to the hp-jack state */ +static void alc680_base_setup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + spec->autocfg.hp_pins[0] = 0x16; + spec->autocfg.speaker_pins[0] = 0x14; + spec->autocfg.speaker_pins[1] = 0x15; + spec->autocfg.input_pins[AUTO_PIN_MIC] = 0x18; + spec->autocfg.input_pins[AUTO_PIN_LINE] = 0x19; +} + +static void alc680_rec_autoswitch(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + unsigned int present; + hda_nid_t new_adc; + + present = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]); + + new_adc = present ? 0x8 : 0x7; + __snd_hda_codec_cleanup_stream(codec, !present ? 0x8 : 0x7, 1); + snd_hda_codec_setup_stream(codec, new_adc, + spec->cur_adc_stream_tag, 0, + spec->cur_adc_format); + +} + +static void alc680_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + if ((res >> 26) == ALC880_HP_EVENT) + alc_automute_amp(codec); + if ((res >> 26) == ALC880_MIC_EVENT) + alc680_rec_autoswitch(codec); +} + +static void alc680_inithook(struct hda_codec *codec) +{ + alc_automute_amp(codec); + alc680_rec_autoswitch(codec); +} + /* create input playback/capture controls for the given pin */ static int alc680_new_analog_output(struct alc_spec *spec, hda_nid_t nid, const char *ctlname, int idx) @@ -19197,13 +19316,7 @@ static void alc680_auto_init_hp_out(struct hda_codec *codec) #define alc680_pcm_analog_capture alc880_pcm_analog_capture #define alc680_pcm_analog_alt_capture alc880_pcm_analog_alt_capture #define alc680_pcm_digital_playback alc880_pcm_digital_playback - -static struct hda_input_mux alc680_capture_source = { - .num_items = 1, - .items = { - { "Mic", 0x0 }, - }, -}; +#define alc680_pcm_digital_capture alc880_pcm_digital_capture /* * BIOS auto configuration @@ -19218,6 +19331,7 @@ static int alc680_parse_auto_config(struct hda_codec *codec) alc680_ignore); if (err < 0) return err; + if (!spec->autocfg.line_outs) { if (spec->autocfg.dig_outs || spec->autocfg.dig_in_pin) { spec->multiout.max_channels = 2; @@ -19239,8 +19353,6 @@ static int alc680_parse_auto_config(struct hda_codec *codec) add_mixer(spec, spec->kctls.list); add_verb(spec, alc680_init_verbs); - spec->num_mux_defs = 1; - spec->input_mux = &alc680_capture_source; err = alc_auto_add_mic_boost(codec); if (err < 0) @@ -19279,17 +19391,17 @@ static struct snd_pci_quirk alc680_cfg_tbl[] = { static struct alc_config_preset alc680_presets[] = { [ALC680_BASE] = { .mixers = { alc680_base_mixer }, - .cap_mixer = alc680_capture_mixer, + .cap_mixer = alc680_master_capture_mixer, .init_verbs = { alc680_init_verbs }, .num_dacs = ARRAY_SIZE(alc680_dac_nids), .dac_nids = alc680_dac_nids, - .num_adc_nids = ARRAY_SIZE(alc680_adc_nids), - .adc_nids = alc680_adc_nids, - .hp_nid = 0x04, .dig_out_nid = ALC680_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc680_modes), .channel_mode = alc680_modes, - .input_mux = &alc680_capture_source, + .unsol_event = alc680_unsol_event, + .setup = alc680_base_setup, + .init_hook = alc680_inithook, + }, }; @@ -19333,9 +19445,9 @@ static int patch_alc680(struct hda_codec *codec) setup_preset(codec, &alc680_presets[board_config]); spec->stream_analog_playback = &alc680_pcm_analog_playback; - spec->stream_analog_capture = &alc680_pcm_analog_capture; - spec->stream_analog_alt_capture = &alc680_pcm_analog_alt_capture; + spec->stream_analog_capture = &alc680_pcm_analog_auto_capture; spec->stream_digital_playback = &alc680_pcm_digital_playback; + spec->stream_digital_capture = &alc680_pcm_digital_capture; if (!spec->adc_nids) { spec->adc_nids = alc680_adc_nids; From 033a273f9836b592dd568abd0f655be469d66704 Mon Sep 17 00:00:00 2001 From: Bernd Petrovitsch Date: Tue, 17 Aug 2010 12:22:08 -0300 Subject: [PATCH 061/104] perf tools: Fix build on POSIX shells POSIX sh does not specify the brace expansion, so fix it by replacing the global $(shell ...) lines quite at the top creating the output directories with real rules. Cc: Ingo Molnar Cc: Kusanagi Kouichi Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1282046280.5822.4.camel@thorin> Signed-off-by: Bernd Petrovitsch Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 41abb90df50d..dcb9700b88d2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -157,10 +157,6 @@ all:: # # Define NO_DWARF if you do not want debug-info analysis feature at all. -$(shell sh -c 'mkdir -p $(OUTPUT)scripts/{perl,python}/Perf-Trace-Util/' 2> /dev/null) -$(shell sh -c 'mkdir -p $(OUTPUT)util/{ui/browsers,scripting-engines}/' 2> /dev/null) -$(shell sh -c 'mkdir $(OUTPUT)bench' 2> /dev/null) - $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) -include $(OUTPUT)PERF-VERSION-FILE @@ -186,8 +182,6 @@ ifeq ($(ARCH),x86_64) ARCH := x86 endif -$(shell sh -c 'mkdir -p $(OUTPUT)arch/$(ARCH)/util/' 2> /dev/null) - # CFLAGS and LDFLAGS are for the users to override from the command line. # @@ -268,6 +262,7 @@ export prefix bindir sharedir sysconfdir CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar RM = rm -f +MKDIR = mkdir TAR = tar FIND = find INSTALL = install @@ -838,6 +833,7 @@ ifndef V QUIET_CC = @echo ' ' CC $@; QUIET_AR = @echo ' ' AR $@; QUIET_LINK = @echo ' ' LINK $@; + QUIET_MKDIR = @echo ' ' MKDIR $@; QUIET_BUILT_IN = @echo ' ' BUILTIN $@; QUIET_GEN = @echo ' ' GEN $@; QUIET_SUBDIR0 = +@subdir= @@ -1012,6 +1008,14 @@ $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) builtin-revert.o wt-status.o: wt-status.h +# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So +# we depend the various files onto their directories. +DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +$(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS))) +# In the second step, we make a rule to actually create these directories +$(sort $(dir $(DIRECTORY_DEPS))): + $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null + $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) From 7a50d06e242614f02004faed2972a8f2c9336d61 Mon Sep 17 00:00:00 2001 From: Graeme Smecher Date: Tue, 17 Aug 2010 10:13:44 -0700 Subject: [PATCH 062/104] of: fix missing headers for of_address_to_resource() in MTD and SysACE drivers The drivers for Xilinx' SystemACE and physically mapped MTDs were missing prototypes for of_address_to_resource(). This patch adds the necessary headers. Signed-off-by: Graeme Smecher Signed-off-by: Grant Likely --- drivers/block/xsysace.c | 1 + drivers/mtd/maps/physmap_of.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 2982b3ee9465..057413bb16e2 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -94,6 +94,7 @@ #include #include #if defined(CONFIG_OF) +#include #include #include #endif diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 00af55d7afba..fe63f6bd663c 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include From 99c796df94afca5256860dd4760017f1dbb3480c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Aug 2010 22:13:22 +0100 Subject: [PATCH 063/104] VIDEO: amba clcd: don't disable an already disabled clock Fix the clock enable/disable tracking in the AMBA CLCD driver so that the driver doesn't try to disable an already disabled clock, thereby causing the clock (if shared) to become unbalanced. This resolves a problem with CLCD on LPC32xx ARM platforms. Reported-by: Kevin Wells Signed-off-by: Russell King --- drivers/video/amba-clcd.c | 10 ++++++++-- include/linux/amba/clcd.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index afe21e6eb544..1c2c68356ea7 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb) /* * Disable CLCD clock source. */ - clk_disable(fb->clk); + if (fb->clk_enabled) { + fb->clk_enabled = false; + clk_disable(fb->clk); + } } static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) @@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) /* * Enable the CLCD clock source. */ - clk_enable(fb->clk); + if (!fb->clk_enabled) { + fb->clk_enabled = true; + clk_enable(fb->clk); + } /* * Bring up by first enabling.. diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index ca16c3801a1e..be33b3affc8a 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -150,6 +150,7 @@ struct clcd_fb { u16 off_cntl; u32 clcd_cntl; u32 cmap[16]; + bool clk_enabled; }; static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) From df486a25900f4dba9cdc3886c4ac871951c6aef3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Aug 2010 17:42:45 -0400 Subject: [PATCH 064/104] NFS: Fix the selection of security flavours in Kconfig Randy Dunlap reports: ERROR: "svc_gss_principal" [fs/nfs/nfs.ko] undefined! because in fs/nfs/Kconfig, NFS_V4 selects RPCSEC_GSS_KRB5 and/or in fs/nfsd/Kconfig, NFSD_V4 selects RPCSEC_GSS_KRB5. RPCSEC_GSS_KRB5 does 5 selects, but none of these is enforced/followed by the fs/nfs[d]/Kconfig configs: select SUNRPC_GSS select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC Reported-by: Randy Dunlap Cc: J. Bruce Fields Acked-by: Randy Dunlap Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 1 - fs/nfsd/Kconfig | 1 - net/sunrpc/Kconfig | 9 +++++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index cc1bb33b59b8..2ddc384ec042 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,7 +63,6 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da159a3..95932f523aef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,7 +69,6 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL - select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 443c161eb8bd..3376d7657185 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL + tristate + depends on SUNRPC && CRYPTO + prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + default y select SUNRPC_GSS - select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC @@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5 available from http://linux-nfs.org/. In addition, user-space Kerberos support should be installed. - If unsure, say N. + If unsure, say Y. config RPCSEC_GSS_SPKM3 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" From 001389b9581c13fe5fc357a0f89234f85af4215d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Aug 2010 10:22:10 +0000 Subject: [PATCH 065/104] netfilter: {ip,ip6,arp}_tables: avoid lockdep false positive After commit 24b36f019 (netfilter: {ip,ip6,arp}_tables: dont block bottom half more than necessary), lockdep can raise a warning because we attempt to lock a spinlock with BH enabled, while the same lock is usually locked by another cpu in a softirq context. Disable again BH to avoid these lockdep warnings. Reported-by: Linus Torvalds Diagnosed-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6bccba31d132..51d6c3167975 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c439721b165a..97b64b22c412 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t, ++i; /* macro does multi eval of i */ } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5359ef4daac5..29a7bca29e3f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } From 1c40be12f7d8ca1d387510d39787b12e512a7ce8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Aug 2010 20:04:22 +0000 Subject: [PATCH 066/104] net sched: fix some kernel memory leaks We leak at least 32bits of kernel memory to user land in tc dump, because we dont init all fields (capab ?) of the dumped structure. Use C99 initializers so that holes and non explicit fields are zeroed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_gact.c | 21 ++++++++++++--------- net/sched/act_mirred.c | 15 ++++++++------- net/sched/act_nat.c | 22 +++++++++++----------- net/sched/act_simple.c | 11 ++++++----- net/sched/act_skbedit.c | 11 ++++++----- 5 files changed, 43 insertions(+), 37 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8406c6654990..c2ed90a4c0b4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); - struct tc_gact opt; struct tcf_gact *gact = a->priv; + struct tc_gact opt = { + .index = gact->tcf_index, + .refcnt = gact->tcf_refcnt - ref, + .bindcnt = gact->tcf_bindcnt - bind, + .action = gact->tcf_action, + }; struct tcf_t t; - opt.index = gact->tcf_index; - opt.refcnt = gact->tcf_refcnt - ref; - opt.bindcnt = gact->tcf_bindcnt - bind; - opt.action = gact->tcf_action; NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB if (gact->tcfg_ptype) { - struct tc_gact_p p_opt; - p_opt.paction = gact->tcfg_paction; - p_opt.pval = gact->tcfg_pval; - p_opt.ptype = gact->tcfg_ptype; + struct tc_gact_p p_opt = { + .paction = gact->tcfg_paction, + .pval = gact->tcfg_pval, + .ptype = gact->tcfg_ptype, + }; + NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 11f195af2da0..0c311be92827 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; - struct tc_mirred opt; + struct tc_mirred opt = { + .index = m->tcf_index, + .action = m->tcf_action, + .refcnt = m->tcf_refcnt - ref, + .bindcnt = m->tcf_bindcnt - bind, + .eaction = m->tcfm_eaction, + .ifindex = m->tcfm_ifindex, + }; struct tcf_t t; - opt.index = m->tcf_index; - opt.action = m->tcf_action; - opt.refcnt = m->tcf_refcnt - ref; - opt.bindcnt = m->tcf_bindcnt - bind; - opt.eaction = m->tcfm_eaction; - opt.ifindex = m->tcfm_ifindex; NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 509a2d53a99d..186eb837e600 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat opt; + struct tc_nat opt = { + .old_addr = p->old_addr, + .new_addr = p->new_addr, + .mask = p->mask, + .flags = p->flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; struct tcf_t t; - opt.old_addr = p->old_addr; - opt.new_addr = p->new_addr; - opt.mask = p->mask; - opt.flags = p->flags; - - opt.index = p->tcf_index; - opt.action = p->tcf_action; - opt.refcnt = p->tcf_refcnt - ref; - opt.bindcnt = p->tcf_bindcnt - bind; - NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4a1d640b0cf1..97e84f3ee775 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; - struct tc_defact opt; + struct tc_defact opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e9607fe55b58..66cbf4eb8855 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = a->priv; - struct tc_skbedit opt; + struct tc_skbedit opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); if (d->flags & SKBEDIT_F_PRIORITY) NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), From ef201bebe5afc91a2b99b45dacc8c6dd88ca9e58 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Aug 2010 17:09:53 -0700 Subject: [PATCH 067/104] sparc64: Fix rwsem constant bug leading to hangs. As noticed by Linus, it is critical that some of the rwsem constants be signed. Yet, hex constants are unsigned unless explicitly casted or negated. The most critical one is RWSEM_WAITING_BIAS. This bug was exacerbated by commit 424acaaeb3a3932d64a9b4bd59df6cf72c22d8f3 ("rwsem: wake queued readers when writer blocks on active read lock") Signed-off-by: David S. Miller --- arch/sparc/include/asm/rwsem-const.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/rwsem-const.h b/arch/sparc/include/asm/rwsem-const.h index a303c9d64d84..e4c61a18bb28 100644 --- a/arch/sparc/include/asm/rwsem-const.h +++ b/arch/sparc/include/asm/rwsem-const.h @@ -5,7 +5,7 @@ #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 #define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS 0xffff0000 +#define RWSEM_WAITING_BIAS (-0x00010000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) From 86fa04b8742ac681d470786f55e2403ada0075b2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Aug 2010 17:12:04 -0700 Subject: [PATCH 068/104] sparc64: Fix atomic64_t routine return values. Should return 'long' instead of 'int'. Thanks to Dimitris Michailidis and Tony Luck. Signed-off-by: David S. Miller --- arch/sparc/include/asm/atomic_64.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 2050ca02c423..f0c74227c737 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -25,9 +25,9 @@ extern void atomic_sub(int, atomic_t *); extern void atomic64_sub(int, atomic64_t *); extern int atomic_add_ret(int, atomic_t *); -extern int atomic64_add_ret(int, atomic64_t *); +extern long atomic64_add_ret(int, atomic64_t *); extern int atomic_sub_ret(int, atomic_t *); -extern int atomic64_sub_ret(int, atomic64_t *); +extern long atomic64_sub_ret(int, atomic64_t *); #define atomic_dec_return(v) atomic_sub_ret(1, v) #define atomic64_dec_return(v) atomic64_sub_ret(1, v) @@ -91,7 +91,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; c = atomic64_read(v); From e5093aec2e6b60c3df2420057ffab9ed4a6d2792 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 11 Aug 2010 02:02:10 +0000 Subject: [PATCH 069/104] net: Fix a memmove bug in dev_gro_receive() >Xin Xiaohui wrote: > I looked into the code dev_gro_receive(), found the code here: > if the frags[0] is pulled to 0, then the page will be released, > and memmove() frags left. > Is that right? I'm not sure if memmove do right or not, but > frags[0].size is never set after memove at least. what I think > a simple way is not to do anything if we found frags[0].size == 0. > The patch is as followed. ... This version of the patch fixes the bug directly in memmove. Reported-by: "Xin, Xiaohui" Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1ae654391442..3721fbb9a83c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3143,7 +3143,7 @@ pull: put_page(skb_shinfo(skb)->frags[0].page); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags); + --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); } } From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Aug 2010 23:52:56 +0100 Subject: [PATCH 070/104] Make do_execve() take a const filename pointer Make do_execve() take a const filename pointer so that kernel_execve() compiles correctly on ARM: arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type This also requires the argv and envp arguments to be consted twice, once for the pointer array and once for the strings the array points to. This is because do_execve() passes a pointer to the filename (now const) to copy_strings_kernel(). A simpler alternative would be to cast the filename pointer in do_execve() when it's passed to copy_strings_kernel(). do_execve() may not change any of the strings it is passed as part of the argv or envp lists as they are some of them in .rodata, so marking these strings as const should be fine. Further kernel_execve() and sys_execve() need to be changed to match. This has been test built on x86_64, frv, arm and mips. Signed-off-by: David Howells Tested-by: Ralf Baechle Acked-by: Russell King Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 5 +++-- arch/arm/kernel/sys_arm.c | 14 +++++++++----- arch/avr32/kernel/process.c | 5 +++-- arch/avr32/kernel/sys_avr32.c | 4 +++- arch/blackfin/kernel/process.c | 4 +++- arch/cris/arch-v10/kernel/process.c | 4 +++- arch/cris/arch-v32/kernel/process.c | 6 ++++-- arch/frv/kernel/process.c | 5 +++-- arch/h8300/kernel/process.c | 5 ++++- arch/h8300/kernel/sys_h8300.c | 4 +++- arch/ia64/kernel/process.c | 4 +++- arch/m32r/kernel/process.c | 4 ++-- arch/m32r/kernel/sys_m32r.c | 4 +++- arch/m68k/kernel/process.c | 4 +++- arch/m68k/kernel/sys_m68k.c | 4 +++- arch/m68knommu/kernel/process.c | 4 +++- arch/m68knommu/kernel/sys_m68k.c | 4 +++- arch/microblaze/kernel/sys_microblaze.c | 10 +++++++--- arch/mips/kernel/syscall.c | 10 +++++++--- arch/mn10300/kernel/process.c | 4 ++-- arch/parisc/hpux/fs.c | 6 ++++-- arch/parisc/kernel/process.c | 15 ++++++++++----- arch/powerpc/kernel/process.c | 5 +++-- arch/s390/kernel/process.c | 5 +++-- arch/score/kernel/sys_score.c | 10 +++++++--- arch/sh/kernel/process_32.c | 7 ++++--- arch/sh/kernel/process_64.c | 4 ++-- arch/sh/kernel/sys_sh32.c | 4 +++- arch/sh/kernel/sys_sh64.c | 4 +++- arch/sparc/kernel/process_32.c | 6 ++++-- arch/sparc/kernel/process_64.c | 4 ++-- arch/sparc/kernel/sys_sparc_32.c | 4 +++- arch/sparc/kernel/sys_sparc_64.c | 4 +++- arch/tile/kernel/process.c | 5 +++-- arch/um/kernel/exec.c | 5 +++-- arch/um/kernel/syscall.c | 4 +++- arch/x86/include/asm/syscalls.h | 5 +++-- arch/x86/kernel/process.c | 5 +++-- arch/x86/kernel/sys_i386_32.c | 4 +++- arch/xtensa/kernel/process.c | 5 +++-- fs/binfmt_misc.c | 2 +- fs/binfmt_script.c | 3 ++- fs/exec.c | 21 +++++++++++---------- include/linux/binfmts.h | 7 ++++--- include/linux/sched.h | 4 +++- include/linux/syscalls.h | 2 +- init/do_mounts_initrd.c | 7 ++++--- init/main.c | 6 +++--- kernel/kmod.c | 4 +++- security/commoncap.c | 2 +- 50 files changed, 179 insertions(+), 98 deletions(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 88e608aebc8c..842dba308eab 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp); * sys_execve() executes a new program. */ asmlinkage int -do_sys_execve(const char __user *ufilename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +do_sys_execve(const char __user *ufilename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char *filename; diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 5b7c541a4c63..62e7c61d0342 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) /* sys_execve() executes a new program. * This is called indirectly via a small wrapper */ -asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +asmlinkage int sys_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char * filename; @@ -78,14 +79,17 @@ out: return error; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { struct pt_regs regs; int ret; memset(®s, 0, sizeof(struct pt_regs)); - ret = do_execve(filename, (char __user * __user *)argv, - (char __user * __user *)envp, ®s); + ret = do_execve(filename, + (const char __user *const __user *)argv, + (const char __user *const __user *)envp, ®s); if (ret < 0) goto out; diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index e5daddff397d..9c46aaad11ce 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) } asmlinkage int sys_execve(const char __user *ufilename, - char __user *__user *uargv, - char __user *__user *uenvp, struct pt_regs *regs) + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 459349b5ed5a..62635a09ae3e 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -7,7 +7,9 @@ */ #include -int kernel_execve(const char *file, char **argv, char **envp) +int kernel_execve(const char *file, + const char *const *argv, + const char *const *envp) { register long scno asm("r8") = __NR_execve; register long sc1 asm("r12") = (long)file; diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index a566f61c002a..01f98cb964d2 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char *filename; diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 93f0f64b1326..9a57db6907f5 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *fname, char **argv, char **envp, +asmlinkage int sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 2661a9529d70..562f84718906 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp, /* sys_execve() executes a new program. */ asmlinkage int -sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp, - struct pt_regs *regs) +sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 428931cf2f0c..2b63b0191f52 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 8b7b78d77d5c..97478138e361 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp, + int dummy, ...) { int error; char * filename; diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index f9b3f44da69f..dc1ac0243b78 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long res __asm__("er0"); register char *const *_c __asm__("er3") = envp; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a879c03b7f1c..16f1c7b04c69 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) } long -sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp, +sys_execve (const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { char *fname; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 8665a4d868ec..422bea9f1dbc 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2, * sys_execve() executes a new program. */ asmlinkage int sys_execve(const char __user *ufilename, - char __user * __user *uargv, - char __user * __user *uenvp, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs regs) { diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 0a00f467edfa..d841fb6cc703 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __scno __asm__ ("r7") = __NR_execve; register long __arg3 __asm__ ("r2") = (long)(envp); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 221d0b71ce39..18732ab23292 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 77896692eb0a..2f431ece7b5f 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6350f68cd026..4d090d3c0897 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -350,7 +350,9 @@ void dump(struct pt_regs *fp) /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp) { int error; char * filename; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index d65e9c4c930c..68488ae47f0a 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 6abab6ebedbe..2250fe9d269b 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs return do_fork(flags, stack, regs, 0, NULL, NULL); } -asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +asmlinkage long microblaze_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs) { int error; char *filename; @@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register const char *__a __asm__("r5") = filename; register const void *__b __asm__("r6") = argv; diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index bddce0bca195..1dc6edff45e0 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user *__user *) (long)regs.regs[5], - (char __user *__user *) (long)regs.regs[6], ®s); + error = do_execve(filename, + (const char __user *const __user *) (long)regs.regs[5], + (const char __user *const __user *) (long)regs.regs[6], + ®s); putname(filename); out: @@ -436,7 +438,9 @@ asmlinkage void bad_stack(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __a0 asm("$4") = (unsigned long) filename; register unsigned long __a1 asm("$5") = (unsigned long) argv; diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 762eb325b949..f48373e2bc1c 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void) } asmlinkage long sys_execve(const char __user *name, - char __user * __user *argv, - char __user * __user *envp) + const char __user *const __user *argv, + const char __user *const __user *envp) { char *filename; int error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 1444875a7611..0dc8543acb4f 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 76332dadc6e9..4b4b9181a1a0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); out: return error; } -extern int __execve(const char *filename, char *const argv[], - char *const envp[], struct task_struct *task); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +extern int __execve(const char *filename, + const char *const argv[], + const char *const envp[], struct task_struct *task); +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { return __execve(filename, argv, envp, current); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index feacfb789686..91356ffda2ca 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_spe_to_thread(current); - error = do_execve(filename, (char __user * __user *) a1, - (char __user * __user *) a2, regs); + error = do_execve(filename, + (const char __user *const __user *) a1, + (const char __user *const __user *) a2, regs); putname(filename); out: return error; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7eafaf2662b9..d3a2d1c6438e 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -267,8 +267,9 @@ asmlinkage void execve_tail(void) /* * sys_execve() executes a new program. */ -SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv, - char __user * __user *, envp) +SYSCALL_DEFINE3(execve, const char __user *, name, + const char __user *const __user *, argv, + const char __user *const __user *, envp) { struct pt_regs *regs = task_pt_regs(current); char *filename; diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c index 651096ff8db4..e478bf9a7e91 100644 --- a/arch/score/kernel/sys_score.c +++ b/arch/score/kernel/sys_score.c @@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs) if (IS_ERR(filename)) return error; - error = do_execve(filename, (char __user *__user*)regs->regs[5], - (char __user *__user *) regs->regs[6], regs); + error = do_execve(filename, + (const char __user *const __user *)regs->regs[5], + (const char __user *const __user *)regs->regs[6], + regs); putname(filename); return error; @@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __r4 asm("r4") = (unsigned long) filename; register unsigned long __r5 asm("r5") = (unsigned long) argv; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 052981972ae6..762a13984bbd 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv, - char __user * __user *uenvp, unsigned long r7, - struct pt_regs __regs) +asmlinkage int sys_execve(const char __user *ufilename, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + unsigned long r7, struct pt_regs __regs) { struct pt_regs *regs = RELOC_HIDE(&__regs, 0); int error; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 68d128d651b3..210c1cabcb7f 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv, goto out; error = do_execve(filename, - (char __user * __user *)uargv, - (char __user * __user *)uenvp, + (const char __user *const __user *)uargv, + (const char __user *const __user *)uenvp, pregs); putname(filename); out: diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index eb68bfdd86e6..f56b6fe5c5d0 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __sc0 __asm__ ("r3") = __NR_execve; register long __sc4 __asm__ ("r4") = (long) filename; diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 287235768bc5..c5a38c4bf410 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -33,7 +33,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve); register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 40e29fc8a4d6..17529298c50a 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if(IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *)regs->u_regs[base + UREG_I1], - (char __user * __user *)regs->u_regs[base + UREG_I2], + (const char __user *const __user *) + regs->u_regs[base + UREG_I1], + (const char __user *const __user *) + regs->u_regs[base + UREG_I2], regs); putname(filename); out: diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a368b45..485f54748384 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I1], - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I2], regs); putname(filename); if (!error) { diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ee995b7dae7e..50794137d710 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -282,7 +282,9 @@ out: * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3d435c42e6db..f836f4e93afe 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ed590ad0acdc..985cc28c74c5 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -long _sys_execve(char __user *path, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +long _sys_execve(const char __user *path, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 59b20d93b6d4..cd145eda3579 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_SP(regs) = esp; } -static long execve1(const char *file, char __user * __user *argv, - char __user *__user *env) +static long execve1(const char *file, + const char __user *const __user *argv, + const char __user *const __user *env) { long error; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 7427c0b1930c..5ddb246626db 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len, return err; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { mm_segment_t fs; int ret; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index feb2ff9bfc2d..f1d8b441fc77 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); -long sys_execve(const char __user *, char __user * __user *, - char __user * __user *, struct pt_regs *); +long sys_execve(const char __user *, + const char __user *const __user *, + const char __user *const __user *, struct pt_regs *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64ecaf0af9af..57d1868a86aa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. */ -long sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +long sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 196552bb412c..d5e06624e34a 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -28,7 +28,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 7c2f38f68ebb..e3558b9a58ba 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp, */ asmlinkage -long xtensa_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, +long xtensa_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, long a3, long a4, long a5, struct pt_regs *regs) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd201716..a7528b913936 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..396a9884591f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..05c7d6b84df7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c809e286d213..a065612fc928 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -50,8 +50,8 @@ struct linux_binprm{ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - char * filename; /* Name of binary as seen by procps */ - char * interp; /* Name of the binary really executed. Most + const char * filename; /* Name of binary as seen by procps */ + const char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; @@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); -extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); diff --git a/include/linux/sched.h b/include/linux/sched.h index ce160d68f5e7..1e2a6db2d7dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2109,7 +2109,9 @@ extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); +extern int do_execve(const char *, + const char __user * const __user *, + const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6e5d19788634..e6319d18a55d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); asmlinkage long sys_perf_event_open( diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 2b108538d0d9..3098a38f3ae1 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,10 +24,11 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); sys_setsid(); diff --git a/init/main.c b/init/main.c index 22d61cb06f98..94ab488039aa 100644 --- a/init/main.c +++ b/init/main.c @@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; extern const struct obs_kernel_param __setup_start[], __setup_end[]; @@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); diff --git a/kernel/kmod.c b/kernel/kmod.c index 6e9b19667a8d..9cd0591c96a2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data) goto fail; } - retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); /* Exec failed? */ fail: diff --git a/security/commoncap.c b/security/commoncap.c index 4e015996dd4d..9d172e6e330c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -40,7 +40,7 @@ * * Warn if that happens, once per boot. */ -static void warn_setuid_and_fcaps_mixed(char *fname) +static void warn_setuid_and_fcaps_mixed(const char *fname) { static int warned; if (!warned) { From f362b73244fb16ea4ae127ced1467dd8adaa7733 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 17 Aug 2010 23:56:55 +0100 Subject: [PATCH 071/104] Fix unprotected access to task credentials in waitid() Using a program like the following: #include #include #include #include int main() { id_t id; siginfo_t infop; pid_t res; id = fork(); if (id == 0) { sleep(1); exit(0); } kill(id, SIGSTOP); alarm(1); waitid(P_PID, id, &infop, WCONTINUED); return 0; } to call waitid() on a stopped process results in access to the child task's credentials without the RCU read lock being held - which may be replaced in the meantime - eliciting the following warning: =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/exit.c:1460 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 2 locks held by waitid02/22252: #0: (tasklist_lock){.?.?..}, at: [] do_wait+0xc5/0x310 #1: (&(&sighand->siglock)->rlock){-.-...}, at: [] wait_consider_task+0x19a/0xbe0 stack backtrace: Pid: 22252, comm: waitid02 Not tainted 2.6.35-323cd+ #3 Call Trace: [] lockdep_rcu_dereference+0xa4/0xc0 [] wait_consider_task+0xaf1/0xbe0 [] do_wait+0xf5/0x310 [] sys_waitid+0x86/0x1f0 [] ? child_wait_callback+0x0/0x70 [] system_call_fastpath+0x16/0x1b This is fixed by holding the RCU read lock in wait_task_continued() to ensure that the task's current credentials aren't destroyed between us reading the cred pointer and us reading the UID from those credentials. Furthermore, protect wait_task_stopped() in the same way. We don't need to keep holding the RCU read lock once we've read the UID from the credentials as holding the RCU read lock doesn't stop the target task from changing its creds under us - so the credentials may be outdated immediately after we've read the pointer, lock or no lock. Signed-off-by: Daniel J Blueman Signed-off-by: David Howells Acked-by: Paul E. McKenney Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 671ed56e0a49..03120229db28 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct wait_opts *wo, if (!unlikely(wo->wo_flags & WNOWAIT)) *p_code = 0; - /* don't need the RCU readlock here as we're holding a spinlock */ - uid = __task_cred(p)->uid; + uid = task_uid(p); unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1460,7 +1459,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) } if (!unlikely(wo->wo_flags & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; - uid = __task_cred(p)->uid; + uid = task_uid(p); spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); From a7c8962bfbc730a9acc0a635bad0f9628b6e816a Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 16 Aug 2010 21:20:07 -0700 Subject: [PATCH 072/104] arcmsr_hba: Missing slab.h include Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- drivers/scsi/arcmsr/arcmsr_hba.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 95a895dd4f13..c8dc392edd57 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include From 602586a83b719df0fbd94196a1359ed35aeb2df3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 17 Aug 2010 15:23:56 -0700 Subject: [PATCH 073/104] shmem: put_super must percpu_counter_destroy list_add() corruption messages reported from shmem_fill_super()'s recently introduced percpu_counter_init(): shmem_put_super() needs to remember to percpu_counter_destroy(). And also check error from percpu_counter_init(). Reported-bisected-and-tested-by: Tetsuo Handa Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/shmem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index dfaa0f4e9789..080b09a57a8f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2325,7 +2325,10 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs) static void shmem_put_super(struct super_block *sb) { - kfree(sb->s_fs_info); + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + + percpu_counter_destroy(&sbinfo->used_blocks); + kfree(sbinfo); sb->s_fs_info = NULL; } @@ -2367,7 +2370,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - percpu_counter_init(&sbinfo->used_blocks, 0); + if (percpu_counter_init(&sbinfo->used_blocks, 0)) + goto failed; sbinfo->free_inodes = sbinfo->max_inodes; sb->s_maxbytes = SHMEM_MAX_BYTES; From 3a3a5ddb7a0f43c3dd0f98673f3d930a456725f8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 16 Aug 2010 18:09:31 +1000 Subject: [PATCH 074/104] Update recovery_offset even when external metadata is used. The update of ->recovery_offset in sync_sbs is appropriate even then external metadata is in use. However sync_sbs is only called when native metadata is used. So move that update in to the top of md_update_sb (which is the only caller of sync_sbs) before the test on ->external. This moves the update out of ->write_lock protection, but those fields only need ->reconfig_mutex protection which they still have. Also move the test on ->persistent up to where ->external is set as for metadata update purposes they are the same. Clear MD_CHANGE_DEVS and MD_CHANGE_CLEAN as they can only be confusing if ->external is set or ->persistent isn't. Finally move the update of ->utime down as it is only relevent (like the ->events update) for native metadata. Signed-off-by: NeilBrown Reported-by: "Kwolek, Adam" --- drivers/md/md.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 11567c7999a2..c148b6302154 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2136,16 +2136,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) * with the rest of the array) */ mdk_rdev_t *rdev; - - /* First make sure individual recovery_offsets are correct */ - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(In_sync, &rdev->flags) && - mddev->curr_resync_completed > rdev->recovery_offset) - rdev->recovery_offset = mddev->curr_resync_completed; - - } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->sb_events == mddev->events || (nospares && @@ -2167,12 +2157,27 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; - mddev->utime = get_seconds(); - if (mddev->external) - return; repeat: + /* First make sure individual recovery_offsets are correct */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + mddev->delta_disks >= 0 && + !test_bit(In_sync, &rdev->flags) && + mddev->curr_resync_completed > rdev->recovery_offset) + rdev->recovery_offset = mddev->curr_resync_completed; + + } + if (mddev->external || !mddev->persistent) { + clear_bit(MD_CHANGE_DEVS, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + wake_up(&mddev->sb_wait); + return; + } + spin_lock_irq(&mddev->write_lock); + mddev->utime = get_seconds(); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; @@ -2221,19 +2226,6 @@ repeat: MD_BUG(); mddev->events --; } - - /* - * do not write anything to disk if using - * nonpersistent superblocks - */ - if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_PENDING, &mddev->flags); - - spin_unlock_irq(&mddev->write_lock); - wake_up(&mddev->sb_wait); - return; - } sync_sbs(mddev, nospares); spin_unlock_irq(&mddev->write_lock); From e6ffbcb6cd0ac471223df24ae77eb486c1ee68cc Mon Sep 17 00:00:00 2001 From: Adrian Drzewiecki Date: Wed, 18 Aug 2010 11:49:02 +1000 Subject: [PATCH 075/104] md: Notify sysfs when RAID1/5/10 disk is In_sync. When RAID1 is done syncing disks, it'll update the state of synced rdevs to In_sync. But it neglected to notify sysfs that the attribute changed. So any programs that are waiting for an rdev's state to change will not be woken. (raid5/raid10 added by neilb) Signed-off-by: Adrian Drzewiecki Signed-off-by: NeilBrown --- drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + drivers/md/raid5.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 73cc74ffc26b..0e1abf1bb38e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1135,6 +1135,7 @@ static int raid1_spare_active(mddev_t *mddev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; spin_unlock_irqrestore(&conf->device_lock, flags); + sysfs_notify_dirent(rdev->sysfs_state); } } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a88aeb5198c7..76d1fc9c65ba 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1130,6 +1130,7 @@ static int raid10_spare_active(mddev_t *mddev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; spin_unlock_irqrestore(&conf->device_lock, flags); + sysfs_notify_dirent(tmp->rdev->sysfs_state); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 866d4b5a144c..7865dd090bde 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5341,6 +5341,7 @@ static int raid5_spare_active(mddev_t *mddev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; spin_unlock_irqrestore(&conf->device_lock, flags); + sysfs_notify_dirent(tmp->rdev->sysfs_state); } } print_raid5_conf(conf); From 6b9656205469269c050963c71fca1998b247a560 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 18 Aug 2010 11:56:59 +1000 Subject: [PATCH 076/104] md: provide appropriate return value for spare_active functions. md_check_recovery expects ->spare_active to return 'true' if any spares were activated, but none of them do, so the consequent change in 'degraded' is not notified through sysfs. So count the number of spares activated, subtract it from 'degraded' just once, and return it. Reported-by: Adrian Drzewiecki Signed-off-by: NeilBrown --- drivers/md/raid1.c | 12 +++++++----- drivers/md/raid10.c | 12 +++++++----- drivers/md/raid5.c | 12 +++++++----- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0e1abf1bb38e..64d96526a9cc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1120,6 +1120,8 @@ static int raid1_spare_active(mddev_t *mddev) { int i; conf_t *conf = mddev->private; + int count = 0; + unsigned long flags; /* * Find all failed disks within the RAID1 configuration @@ -1131,16 +1133,16 @@ static int raid1_spare_active(mddev_t *mddev) if (rdev && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; sysfs_notify_dirent(rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 76d1fc9c65ba..a2f8a7153dce 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1116,6 +1116,8 @@ static int raid10_spare_active(mddev_t *mddev) int i; conf_t *conf = mddev->private; mirror_info_t *tmp; + int count = 0; + unsigned long flags; /* * Find all non-in_sync disks within the RAID10 configuration @@ -1126,16 +1128,16 @@ static int raid10_spare_active(mddev_t *mddev) if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; sysfs_notify_dirent(tmp->rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7865dd090bde..69b0a169e43d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5330,6 +5330,8 @@ static int raid5_spare_active(mddev_t *mddev) int i; raid5_conf_t *conf = mddev->private; struct disk_info *tmp; + int count = 0; + unsigned long flags; for (i = 0; i < conf->raid_disks; i++) { tmp = conf->disks + i; @@ -5337,15 +5339,15 @@ static int raid5_spare_active(mddev_t *mddev) && tmp->rdev->recovery_offset == MaxSector && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; sysfs_notify_dirent(tmp->rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_raid5_conf(conf); - return 0; + return count; } static int raid5_remove_disk(mddev_t *mddev, int number) From dc6ae5e471425762122bb93310dd46101ccd840d Mon Sep 17 00:00:00 2001 From: Jate Sujjavanich Date: Mon, 7 Jun 2010 11:39:30 -0400 Subject: [PATCH 077/104] m68knommu: arch/m68k/include/asm/ide.h fix for nommu The arch/m68k/include/asm/ide.h produces errors when the IDE driver is compiled for my 523x uClinux system under kernel. The header makes some redefines of operators not defined in the arch/m68k/include/asm/io_no.h header. There are no separate mmio and iospace defines. Signed-off-by: Jate Sujjavanich Acked-by: Geert Uytterhoeven Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/ide.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h index 3958726664ba..492fee8a1ab2 100644 --- a/arch/m68k/include/asm/ide.h +++ b/arch/m68k/include/asm/ide.h @@ -1,6 +1,4 @@ /* - * linux/include/asm-m68k/ide.h - * * Copyright (C) 1994-1996 Linus Torvalds & authors */ @@ -34,6 +32,8 @@ #include #include +#ifdef CONFIG_MMU + /* * Get rid of defs from io.h - ide has its private and conflicting versions * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we @@ -53,5 +53,14 @@ #define __ide_mm_outsw(port, addr, n) raw_outsw((u16 *)port, addr, n) #define __ide_mm_outsl(port, addr, n) raw_outsl((u32 *)port, addr, n) +#else + +#define __ide_mm_insw(port, addr, n) io_insw((unsigned int)port, addr, n) +#define __ide_mm_insl(port, addr, n) io_insl((unsigned int)port, addr, n) +#define __ide_mm_outsw(port, addr, n) io_outsw((unsigned int)port, addr, n) +#define __ide_mm_outsl(port, addr, n) io_outsl((unsigned int)port, addr, n) + +#endif /* CONFIG_MMU */ + #endif /* __KERNEL__ */ #endif /* _M68K_IDE_H */ From c7484cf467df44c0d478b0412dc01e71fbec7923 Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Wed, 14 Jul 2010 22:01:13 +0400 Subject: [PATCH 078/104] m68knommu: formatting of pointers in printk() arch/m68knommu/kernel/process.c: formatting of pointers in printk() Use %p instead of %08x in printk(). Signed-off-by: Kulikov Vasiliy Signed-off-by: Greg Ungerer --- arch/m68knommu/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6350f68cd026..3fb9e45df888 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -316,14 +316,14 @@ void dump(struct pt_regs *fp) fp->d0, fp->d1, fp->d2, fp->d3); printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", fp->d4, fp->d5, fp->a0, fp->a1); - printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %08x\n", - (unsigned int) rdusp(), (unsigned int) fp); + printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %p\n", + (unsigned int) rdusp(), fp); printk(KERN_EMERG "\nCODE:"); tp = ((unsigned char *) fp->pc) - 0x20; for (sp = (unsigned long *) tp, i = 0; (i < 0x40); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); @@ -332,7 +332,7 @@ void dump(struct pt_regs *fp) tp = ((unsigned char *) fp) - 0x40; for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); @@ -341,7 +341,7 @@ void dump(struct pt_regs *fp) tp = (unsigned char *) (rdusp() - 0x10); for (sp = (unsigned long *) tp, i = 0; (i < 0x80); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); From 5e1c53356d79591b10cdea12e3d5ebace8e7b6fa Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 28 Jul 2010 13:32:46 +1000 Subject: [PATCH 079/104] m68knommu: include sched.h in ColdFire/SPI driver Using the coldfire qspi driver, I get the following error: drivers/spi/coldfire_qspi.c: In function 'mcfqspi_irq_handler': drivers/spi/coldfire_qspi.c:166: error: 'TASK_NORMAL' undeclared (first use in this function) drivers/spi/coldfire_qspi.c:166: error: (Each undeclared identifier is reported only once It is solved by adding the following include to coldfire_sqpi.c: #include Fix suggested by Jate Sujjavanich Signed-off-by: Greg Ungerer --- drivers/spi/coldfire_qspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/coldfire_qspi.c b/drivers/spi/coldfire_qspi.c index 59be3efe0636..052b3c7fa6a0 100644 --- a/drivers/spi/coldfire_qspi.c +++ b/drivers/spi/coldfire_qspi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include From 5c79a5ae23e72fa12f1c7c528f62bf3ea35da0dc Mon Sep 17 00:00:00 2001 From: Ernst Schwab Date: Mon, 16 Aug 2010 15:10:11 +0200 Subject: [PATCH 080/104] spi.h: missing kernel-doc notation, please fix Added comments in kernel-doc notation for previously added struct fields. Signed-off-by: Ernst Schwab Acked-by: Randy Dunlap Signed-off-by: Grant Likely --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ae0a5286f558..92e52a1e6af3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @flags: other constraints relevant to this driver + * @bus_lock_spinlock: spinlock for SPI bus locking + * @bus_lock_mutex: mutex for SPI bus locking + * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. From f4ae2faa40199b97b12f508234640bc565d166f8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 11 Aug 2010 14:07:01 +0300 Subject: [PATCH 081/104] fix reiserfs_evict_inode end_writeback second call reiserfs_evict_inode calls end_writeback two times hitting kernel BUG at fs/inode.c:298 becase inode->i_state is I_CLEAR already. Signed-off-by: Sergey Senozhatsky Signed-off-by: Al Viro --- fs/reiserfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413dcbe1..caa758377d66 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); + return; no_delete: end_writeback(inode); From b845ff8f3ea2988ad5041315e2d35298e85cbc2f Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Aug 2010 17:08:35 +0300 Subject: [PATCH 082/104] cramfs: only unlock new inodes Commit 77b8a75f5bb introduced a warning at fs/inode.c:692 unlock_new_inode(), caused by unlock_new_inode() being called on existing inodes as well. This patch changes setup_inode() to only call unlock_new_inode() for I_NEW inodes. Signed-off-by: Alexander Shishkin Signed-off-by: Al Viro --- fs/cramfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130b366c..1e7a33028d33 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } } else { inode = iget_locked(sb, CRAMINO(cramfs_inode)); - if (inode) { + if (inode && (inode->i_state & I_NEW)) { setup_inode(inode, cramfs_inode); unlock_new_inode(inode); } From dad5eb6daa7eeb63d4fc9d982892c59faa07e797 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 17 Aug 2010 12:42:13 +0200 Subject: [PATCH 083/104] vfs: update ctime when changing the file's permission by setfacl generic_acl_set didn't update the ctime of the file when its permission was changed. Steps to reproduce: # touch aaa # stat -c %Z aaa 1275289822 # setfacl -m 'u::x,g::x,o::x' aaa # stat -c %Z aaa 1275289822 <- unchanged But, according to the spec of the ctime, vfs must update it. Port of ext3 patch by Miao Xie . CC: Al Viro Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/generic_acl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e564157..6bc9e3a5a693 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, if (error < 0) goto failed; inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); acl = NULL; From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:05:45 +0200 Subject: [PATCH 084/104] kill BH_Ordered flag Instead of abusing a buffer_head flag just add a variant of sync_dirty_buffer which allows passing the exact type of write flag required. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 17 ++++++------- fs/jbd/commit.c | 49 +++++++++++++++++++------------------ fs/jbd2/commit.c | 39 ++++++++++++----------------- fs/nilfs2/super.c | 28 ++++++++++----------- include/linux/buffer_head.h | 3 +-- 5 files changed, 63 insertions(+), 73 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..6c8ad977f3d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_delay(bh)); BUG_ON(buffer_unwritten(bh)); - /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - /* * Only clear out a write error when rewriting */ @@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); - - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..68345430fb48 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..72c1cf83eb85 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:06:24 +0200 Subject: [PATCH 085/104] remove SWRITE* I/O types These flags aren't real I/O types, but tell ll_rw_block to always lock the buffer instead of giving up on a failed trylock. Instead add a new write_dirty_buffer helper that implements this semantic and use it from the existing SWRITE* callers. Note that the ll_rw_block code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which this patch fixes. In the ufs code clean up the helper that used to call ll_rw_block to mirror sync_dirty_buffer, which is the function it implements for compound buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 52 +++++++++++++++++++++---------------- fs/fat/misc.c | 4 ++- fs/jbd/checkpoint.c | 4 ++- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd2/checkpoint.c | 4 ++- fs/jbd2/journal.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/ufs/balloc.c | 24 ++++++----------- fs/ufs/ialloc.c | 18 +++++-------- fs/ufs/truncate.c | 18 +++++-------- fs/ufs/util.c | 20 ++++++-------- fs/ufs/util.h | 3 +-- include/linux/buffer_head.h | 1 + include/linux/fs.h | 9 ------- 16 files changed, 73 insertions(+), 94 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 6c8ad977f3d4..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 72c1cf83eb85..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..29f7c975304c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about From 2c7d46ec192e4f2b350f67a0e185b9bce646cd6b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 18 Aug 2010 16:16:05 +1000 Subject: [PATCH 086/104] md raid-1/10 Fix bio_rw bit manipulations again commit 7b6d91daee5cac6402186ff224c3af39d79f4a0e changed the behaviour of a few variables in raid1 and raid10 from flags to bit-sets, but left them as type 'bool' so they did not work. Change them (back) to unsigned long. (historical note: see 1ef04fefe2241087d9db7e9615c3f11b516e36cf) Signed-off-by: NeilBrown Reported-by: Jiri Slaby and many others --- drivers/md/raid1.c | 8 ++++---- drivers/md/raid10.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 64d96526a9cc..ad83a4dcadc3 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -787,8 +787,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) struct bio_list bl; struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); - const bool do_sync = (bio->bi_rw & REQ_SYNC); - bool do_barriers; + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); + unsigned long do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -1643,7 +1643,7 @@ static void raid1d(mddev_t *mddev) * We already have a nr_pending reference on these rdevs. */ int i; - const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) @@ -1699,7 +1699,7 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; + const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a2f8a7153dce..84718383124d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -799,7 +799,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const bool do_sync = (bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; @@ -1737,7 +1737,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) From 850a496f969719b494cc972ab1d0e088737358d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Aug 2010 06:18:57 -0400 Subject: [PATCH 087/104] hostfs: dumb (and usually harmless) tpyo - strncpy instead of strlcpy ... not harmless in this case - we have a string in the end of buffer already. Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e55535a4e..77c4f6ee6c40 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) __putname(name); return NULL; } - strncpy(name, root, PATH_MAX); + strlcpy(name, root, PATH_MAX); if (len > p - name) { __putname(name); return NULL; From 3b6036d148bad5bb7928b021a49bb9e395361084 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Aug 2010 06:21:10 -0400 Subject: [PATCH 088/104] hostfs ->follow_link() braino we want the assignment to err done inside the if () to be visible after it, so (re)declaring err inside if () body is wrong. Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 77c4f6ee6c40..f7dc9b5f9ef8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) char *path = dentry_name(dentry); int err = -ENOMEM; if (path) { - int err = hostfs_do_readlink(path, link, PATH_MAX); + err = hostfs_do_readlink(path, link, PATH_MAX); if (err == PATH_MAX) err = -E2BIG; __putname(path); From 3a48ee8a4ad26c3a538b6fc11a86a8f80c3dce18 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Aug 2010 19:05:23 +0200 Subject: [PATCH 089/104] mbcache: Limit the maximum number of cache entries Limit the maximum number of mb_cache entries depending on the number of hash buckets: if the only limit to the number of cache entries is the available memory the hash chains can grow very long, taking a long time to search. At least partially solves https://bugzilla.lustre.org/show_bug.cgi?id=22771. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/mbcache.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cdfd15b..93444747237b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -80,6 +80,7 @@ struct mb_cache { struct list_head c_cache_list; const char *c_name; atomic_t c_entry_count; + int c_max_entries; int c_bucket_bits; struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; @@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) if (!cache->c_entry_cache) goto fail2; + /* + * Set an upper limit on the number of cache entries so that the hash + * chains won't grow too long. + */ + cache->c_max_entries = bucket_count << 4; + spin_lock(&mb_cache_spinlock); list_add(&cache->c_cache_list, &mb_cache_list); spin_unlock(&mb_cache_spinlock); @@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) kfree(cache); } - /* * mb_cache_entry_alloc() * @@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) struct mb_cache_entry * mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) { - struct mb_cache_entry *ce; + struct mb_cache_entry *ce = NULL; - ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); - if (ce) { + if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { + spin_lock(&mb_cache_spinlock); + if (!list_empty(&mb_cache_lru_list)) { + ce = list_entry(mb_cache_lru_list.next, + struct mb_cache_entry, e_lru_list); + list_del_init(&ce->e_lru_list); + __mb_cache_entry_unhash(ce); + } + spin_unlock(&mb_cache_spinlock); + } + if (!ce) { + ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); + if (!ce) + return NULL; atomic_inc(&cache->c_entry_count); INIT_LIST_HEAD(&ce->e_lru_list); INIT_LIST_HEAD(&ce->e_block_list); ce->e_cache = cache; - ce->e_used = 1 + MB_CACHE_WRITER; ce->e_queued = 0; } + ce->e_used = 1 + MB_CACHE_WRITER; return ce; } From 2e2e88ea8c3bd9e1bd6e42faf047a4ac3fbb3b2f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:30 +1000 Subject: [PATCH 090/104] fs: fix do_lookup false negative fs: fix do_lookup false negative In do_lookup, if we initially find no dentry, we take the directory i_mutex and re-check the lookup. If we find a dentry there, then we revalidate it if needed. However if that revalidate asks for the dentry to be invalidated, we return -ENOENT from do_lookup. What should happen instead is an attempt to allocate and lookup a new dentry. This is probably not noticed because it is rare. It is only reached if a concurrent create races in first (in which case, the dentry probably won't be invalidated anyway), or if the racy __d_lookup has failed due to a false-negative (which is very rare). Fix this by removing code and have it use the normal reval path. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/namei.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 17ea76bf2fbe..c2742b7dec59 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -709,6 +709,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; +found: if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: @@ -766,14 +767,7 @@ out_unlock: * we waited on the semaphore. Need to revalidate. */ mutex_unlock(&dir->i_mutex); - if (dentry->d_op && dentry->d_op->d_revalidate) { - dentry = do_revalidate(dentry, nd); - if (!dentry) - dentry = ERR_PTR(-ENOENT); - } - if (IS_ERR(dentry)) - goto fail; - goto done; + goto found; need_revalidate: dentry = do_revalidate(dentry, nd); From baa0389073eb7beb9d36f6d13df97e16c1bfa626 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:31 +1000 Subject: [PATCH 091/104] fs: dentry allocation consolidation fs: dentry allocation consolidation There are 2 duplicate copies of code in dentry allocation in path lookup. Consolidate them into a single function. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/namei.c | 70 +++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index c2742b7dec59..b815a4d2e1d6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -685,6 +685,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) follow_mount(&nd->path); } +/* + * Allocate a dentry with name and parent, and perform a parent + * directory ->lookup on it. Returns the new dentry, or ERR_PTR + * on error. parent->d_inode->i_mutex must be held. d_lookup must + * have verified that no child exists while under i_mutex. + */ +static struct dentry *d_alloc_and_lookup(struct dentry *parent, + struct qstr *name, struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *dentry; + struct dentry *old; + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + dentry = d_alloc(parent, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} + /* * It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. @@ -738,30 +767,13 @@ need_lookup: * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup */ dentry = d_lookup(parent, name); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(dir)) - goto out_unlock; - - new = d_alloc(parent, name); - dentry = ERR_PTR(-ENOMEM); - if (new) { - dentry = dir->i_op->lookup(dir, new, nd); - if (dentry) - dput(new); - else - dentry = new; - } -out_unlock: + if (likely(!dentry)) { + dentry = d_alloc_and_lookup(parent, name, nd); mutex_unlock(&dir->i_mutex); if (IS_ERR(dentry)) goto fail; goto done; } - /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. @@ -1135,24 +1147,8 @@ static struct dentry *__lookup_hash(struct qstr *name, if (dentry && dentry->d_op && dentry->d_op->d_revalidate) dentry = do_revalidate(dentry, nd); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(inode)) - goto out; - - new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - dentry = inode->i_op->lookup(inode, new, nd); - if (!dentry) - dentry = new; - else - dput(new); - } + if (!dentry) + dentry = d_alloc_and_lookup(base, name, nd); out: return dentry; } From 44672e4fbd40e2dda8bbce7d0f71d24dbfc7e00e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:32 +1000 Subject: [PATCH 092/104] apparmor: use task path helpers apparmor: use task path helpers Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- security/apparmor/path.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/security/apparmor/path.c b/security/apparmor/path.c index 96bab9469d48..19358dc14605 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c @@ -62,19 +62,14 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, int deleted, connected; int error = 0; - /* Get the root we want to resolve too */ + /* Get the root we want to resolve too, released below */ if (flags & PATH_CHROOT_REL) { /* resolve paths relative to chroot */ - read_lock(¤t->fs->lock); - root = current->fs->root; - /* released below */ - path_get(&root); - read_unlock(¤t->fs->lock); + get_fs_root(current->fs, &root); } else { /* resolve paths relative to namespace */ root.mnt = current->nsproxy->mnt_ns->root; root.dentry = root.mnt->mnt_root; - /* released below */ path_get(&root); } From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:33 +1000 Subject: [PATCH 093/104] fs: fs_struct rwlock to spinlock fs: fs_struct rwlock to spinlock struct fs_struct.lock is an rwlock with the read-side used to protect root and pwd members while taking references to them. Taking a reference to a path typically requires just 2 atomic ops, so the critical section is very small. Parallel read-side operations would have cacheline contention on the lock, the dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a real parallelism increase. Replace it with a spinlock to avoid one or two atomic operations in typical path lookup fastpath. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/staging/pohmelfs/path_entry.c | 8 +++---- fs/exec.c | 4 ++-- fs/fs_struct.c | 32 +++++++++++++-------------- include/linux/fs_struct.h | 14 ++++++------ kernel/fork.c | 10 ++++----- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd50d638..8ec83d2dffb7 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); path.mnt = mntget(current->fs->root.mnt); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); path.dentry = d; @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); root = dget(current->fs->root.dentry); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); spin_lock(&dcache_lock); diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..5adab2c93eca 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d5202138..a42b5bf02f8b 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -5,7 +5,7 @@ struct fs_struct { int users; - rwlock_t lock; + spinlock_t lock; int umask; int in_exec; struct path root, pwd; @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 98b450876f93..856eac3ec52e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->in_exec) { - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return -EAGAIN; } fs->users++; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return 0; } tsk->fs = copy_fs_struct(fs); @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } if (new_mm) { From b04f784e5d19ed58892833dae845738972cea260 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:34 +1000 Subject: [PATCH 094/104] fs: remove extra lookup in __lookup_hash fs: remove extra lookup in __lookup_hash Optimize lookup for create operations, where no dentry should often be common-case. In cases where it is not, such as unlink, the added overhead is much smaller than the removed. Also, move comments about __d_lookup racyness to the __d_lookup call site. d_lookup is intuitive; __d_lookup is what needs commenting. So in that same vein, add kerneldoc comments to __d_lookup and clean up some of the comments: - We are interested in how the RCU lookup works here, particularly with renames. Make that explicit, and point to the document where it is explained in more detail. - RCU is pretty standard now, and macros make implementations pretty mindless. If we want to know about RCU barrier details, we look in RCU code. - Delete some boring legacy comments because we don't care much about how the code used to work, more about the interesting parts of how it works now. So comments about lazy LRU may be interesting, but would better be done in the LRU or refcount management code. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 60 +++++++++++++++++++++++++++++++---------------------- fs/namei.c | 32 ++++++++++++++-------------- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4d13bf50b7b1..d56a40b5a577 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find + * Returns: dentry, or NULL * - * Searches the children of the parent dentry for the name in question. If - * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use dput to free the entry when it has - * finished using it. %NULL is returned on failure. - * - * __d_lookup is dcache_lock free. The hash list is protected using RCU. - * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_lock is used. - * - * Overflows in memcmp(), while d_move, are avoided by keeping the length - * and name pointer in one structure pointed by d_qstr. - * - * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while - * lookup is going on. - * - * The dentry unused LRU is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent and __dget_locked. This laziness saves lookup from dcache_lock - * acquisition. - * - * d_lookup() is protected against the concurrent renames in some unrelated - * directory using the seqlockt_t rename_lock. + * d_lookup searches the children of the parent dentry for the name in + * question. If the dentry is found its reference count is incremented and the + * dentry is returned. The caller must use dput to free the entry when it has + * finished using it. %NULL is returned if the dentry does not exist. */ - struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; @@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); +/* + * __d_lookup - search for a dentry (racy) + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * __d_lookup is like d_lookup, however it may (rarely) return a + * false-negative result due to unrelated rename activity. + * + * __d_lookup is slightly faster by avoiding rename_lock read seqlock, + * however it must be used carefully, eg. with a following d_lookup in + * the case of failure. + * + * __d_lookup callers must be commented. + */ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct hlist_node *node; struct dentry *dentry; + /* + * The hash list is protected using RCU. + * + * Take d_lock when comparing a candidate dentry, to avoid races + * with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { @@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) /* * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because we're - * about to compare the whole name anyway. + * changed things. Don't bother checking the hash because + * we're about to compare the whole name anyway. */ if (dentry->d_parent != parent) goto next; diff --git a/fs/namei.c b/fs/namei.c index b815a4d2e1d6..11de7c39ff76 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -735,6 +735,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return err; } + /* + * Rename seqlock is not required here because in the off chance + * of a false negative due to a concurrent rename, we're going to + * do the non-racy lookup, below. + */ dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; @@ -754,17 +759,13 @@ need_lookup: mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created - * while we waited for the directory semaphore.. + * while we waited for the directory semaphore, or the first + * lookup failed due to an unrelated rename. * - * FIXME! This could use version numbering or similar to - * avoid unnecessary cache lookups. - * - * The "dcache_lock" is purely to protect the RCU list walker - * from concurrent renames at this point (we mustn't get false - * negatives from the RCU list walk here, unlike the optimistic - * fast walk). - * - * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup + * This could use version numbering or similar to avoid unnecessary + * cache lookups, but then we'd have to do the first lookup in the + * non-racy way. However in the common case here, everything should + * be hot in cache, so would it be a big win? */ dentry = d_lookup(parent, name); if (likely(!dentry)) { @@ -1136,13 +1137,12 @@ static struct dentry *__lookup_hash(struct qstr *name, goto out; } - dentry = __d_lookup(base, name); - - /* lockess __d_lookup may fail due to concurrent d_move() - * in some unrelated directory, so try with d_lookup + /* + * Don't bother with __d_lookup: callers are for creat as + * well as unlink, so a lot of the time it would cost + * a double lookup. */ - if (!dentry) - dentry = d_lookup(base, name); + dentry = d_lookup(base, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) dentry = do_revalidate(dentry, nd); From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:35 +1000 Subject: [PATCH 095/104] fs: cleanup files_lock locking fs: cleanup files_lock locking Lock tty_files with a new spinlock, tty_files_lock; provide helpers to manipulate the per-sb files list; unexport the files_lock spinlock. Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Andi Kleen Acked-by: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +++++- drivers/char/tty_io.c | 26 +++++++++++++++++-------- fs/file_table.c | 42 +++++++++++++++++----------------------- fs/open.c | 4 ++-- include/linux/fs.h | 7 ++----- include/linux/tty.h | 1 + security/selinux/hooks.c | 4 ++-- 7 files changed, 48 insertions(+), 42 deletions(-) diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae1f9bb..2c64faa8efa4 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; - file_move(filp, &tty->tty_files); + + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42375a2..cd5b829634ea 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +/* Spinlock to protect the tty->tty_files list */ +DEFINE_SPINLOCK(tty_files_lock); + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - file_list_lock(); + spin_lock(&tty_files_lock); list_for_each(p, &tty->tty_files) { count++; } - file_list_unlock(); + spin_unlock(&tty_files_lock); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty) workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); - file_list_lock(); + spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) @@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty) __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_ldisc_hangup(tty); @@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - file_list_lock(); + spin_lock(&tty_files_lock); list_del_init(&tty->tty_files); - file_list_unlock(); + spin_unlock(&tty_files_lock); put_pid(tty->pgrp); put_pid(tty->session); @@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - file_kill(filp); + spin_lock(&tty_files_lock); + BUG_ON(list_empty(&filp->f_u.fu_list)); + list_del_init(&filp->f_u.fu_list); + spin_unlock(&tty_files_lock); filp->private_data = NULL; /* @@ -1840,7 +1846,11 @@ got_driver: } filp->private_data = tty; - file_move(filp, &tty->tty_files); + BUG_ON(list_empty(&filp->f_u.fu_list)); + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..6f0e62ecfddd 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,8 +32,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +248,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +void file_sb_list_add(struct file *file, struct super_block *sb) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); + spin_lock(&files_lock); + BUG_ON(!list_empty(&file->f_u.fu_list)); + list_add(&file->f_u.fu_list, &sb->s_files); + spin_unlock(&files_lock); } -void file_kill(struct file *file) +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + spin_lock(&files_lock); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + spin_unlock(&files_lock); } } @@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb) struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(file, &sb->s_files, f_u.fu_list) { struct inode *inode = file->f_path.dentry->d_inode; @@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb) if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } - file_list_unlock(); + spin_unlock(&files_lock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + spin_unlock(&files_lock); return 0; } @@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(f, &sb->s_files, f_u.fu_list) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -408,16 +405,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + spin_unlock(&files_lock); mnt_drop_write(mnt); mntput(mnt); goto retry; } - file_list_unlock(); + spin_unlock(&files_lock); } void __init files_init(unsigned long mempages) diff --git a/fs/open.c b/fs/open.c index 630715f9f73d..d74e1983e8dc 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/include/linux/fs.h b/include/linux/fs.h index 29f7c975304c..5a9a9e5a3705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -944,9 +944,6 @@ struct file { unsigned long f_mnt_write_state; #endif }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) @@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3ddc62..f6b371a2514e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; +extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f96e54f..bd7da0f0ccf3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - file_list_lock(); + spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { struct inode *inode; @@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, drop_tty = 1; } } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_kref_put(tty); } /* Reset controlling tty. */ From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:36 +1000 Subject: [PATCH 096/104] tty: fix fu_list abuse tty: fix fu_list abuse tty code abuses fu_list, which causes a bug in remount,ro handling. If a tty device node is opened on a filesystem, then the last link to the inode removed, the filesystem will be allowed to be remounted readonly. This is because fs_may_remount_ro does not find the 0 link tty inode on the file sb list (because the tty code incorrectly removed it to use for its own purpose). This can result in a filesystem with errors after it is marked "clean". Taking idea from Christoph's initial patch, allocate a tty private struct at file->private_data and put our required list fields in there, linking file and tty. This makes tty nodes behave the same way as other device nodes and avoid meddling with the vfs, and avoids this bug. The error handling is not trivial in the tty code, so for this bugfix, I take the simple approach of using __GFP_NOFAIL and don't worry about memory errors. This is not a problem because our allocator doesn't fail small allocs as a rule anyway. So proper error handling is left as an exercise for tty hackers. [ Arguably filesystem's device inode would ideally be divorced from the driver's pseudo inode when it is opened, but in practice it's not clear whether that will ever be worth implementing. ] Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +-- drivers/char/tty_io.c | 84 ++++++++++++++++++++++++++-------------- fs/internal.h | 2 + include/linux/fs.h | 2 - include/linux/tty.h | 8 ++++ security/selinux/hooks.c | 5 ++- 6 files changed, 69 insertions(+), 38 deletions(-) diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 2c64faa8efa4..c350d01716bd 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) } set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - filp->private_data = tty; - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index cd5b829634ea..949067a0bd47 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) kfree(tty); } +static inline struct tty_struct *file_tty(struct file *file) +{ + return ((struct tty_file_private *)file->private_data)->tty; +} + +/* Associate a new file with the tty structure */ +void tty_add_file(struct tty_struct *tty, struct file *file) +{ + struct tty_file_private *priv; + + /* XXX: must implement proper error handling in callers */ + priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); + + priv->tty = tty; + priv->file = file; + file->private_data = priv; + + spin_lock(&tty_files_lock); + list_add(&priv->list, &tty->tty_files); + spin_unlock(&tty_files_lock); +} + +/* Delete file from its tty */ +void tty_del_file(struct file *file) +{ + struct tty_file_private *priv = file->private_data; + + spin_lock(&tty_files_lock); + list_del(&priv->list); + spin_unlock(&tty_files_lock); + file->private_data = NULL; + kfree(priv); +} + + #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) /** @@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; + struct tty_file_private *priv; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { + if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } @@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ - list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { + list_for_each_entry(priv, &tty->tty_files, list) { + filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) @@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; - struct tty_struct *tty; - struct inode *inode; + struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; - tty = file->private_data; - inode = file->f_path.dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) @@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) static ssize_t tty_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); + struct tty_ldisc *ld; ssize_t ret; - struct tty_ldisc *ld; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->ops->write || @@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) int tty_release(struct inode *inode, struct file *filp) { - struct tty_struct *tty, *o_tty; + struct tty_struct *tty = file_tty(filp); + struct tty_struct *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; - tty = filp->private_data; if (tty_paranoia_check(tty, inode, "tty_release_dev")) return 0; @@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - spin_lock(&tty_files_lock); - BUG_ON(list_empty(&filp->f_u.fu_list)); - list_del_init(&filp->f_u.fu_list); - spin_unlock(&tty_files_lock); - filp->private_data = NULL; + tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. @@ -1845,12 +1875,8 @@ got_driver: return PTR_ERR(tty); } - filp->private_data = tty; - BUG_ON(list_empty(&filp->f_u.fu_list)); - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); + check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -1926,11 +1952,10 @@ got_driver: static unsigned int tty_poll(struct file *filp, poll_table *wait) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; int ret = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) return 0; @@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int __tty_fasync(int fd, struct file *filp, int on) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) goto out; @@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *tty, *real_tty; + struct tty_struct *tty = file_tty(file); + struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; struct inode *inode = file->f_dentry->d_inode; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; @@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - struct tty_struct *tty = file->private_data; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; @@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) if (!filp) continue; if (filp->f_op->read == tty_read && - filp->private_data == tty) { + file_tty(filp) == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", task_pid_nr(p), p->comm, i); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..6a5c13a80660 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a9a9e5a3705..5e65add0f163 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index f6b371a2514e..67d64e6efe7a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -329,6 +329,13 @@ struct tty_struct { struct tty_port *port; }; +/* Each of a tty's open files has private_data pointing to tty_file_private */ +struct tty_file_private { + struct tty_struct *tty; + struct file *file; + struct list_head list; +}; + /* tty magic number */ #define TTY_MAGIC 0x5401 @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); +extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bd7da0f0ccf3..4796ddd4e721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (tty) { spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { + struct tty_file_private *file_priv; struct inode *inode; /* Revalidate access to controlling tty. @@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, than using file_has_perm, as this particular open file may belong to another process and we are only interested in the inode-based check here. */ - file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); + file_priv = list_first_entry(&tty->tty_files, + struct tty_file_private, list); + file = file_priv->file; inode = file->f_path.dentry->d_inode; if (inode_has_perm(cred, inode, FILE__READ | FILE__WRITE, NULL)) { From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:37 +1000 Subject: [PATCH 097/104] lglock: introduce special lglock and brlock spin locks lglock: introduce special lglock and brlock spin locks This patch introduces "local-global" locks (lglocks). These can be used to: - Provide fast exclusive access to per-CPU data, with exclusive access to another CPU's data allowed but possibly subject to contention, and to provide very slow exclusive access to all per-CPU data. - Or to provide very fast and scalable read serialisation, and to provide very slow exclusive serialisation of data (not necessarily per-CPU data). Brlocks are also implemented as a short-hand notation for the latter use case. Thanks to Paul for local/global naming convention. Cc: linux-kernel@vger.kernel.org Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 include/linux/lglock.h diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 000000000000..b288cb713b90 --- /dev/null +++ b/include/linux/lglock.h @@ -0,0 +1,172 @@ +/* + * Specialised local-global spinlock. Can only be declared as global variables + * to avoid overhead and keep things simple (and we don't want to start using + * these inside dynamically allocated structures). + * + * "local/global locks" (lglocks) can be used to: + * + * - Provide fast exclusive access to per-CPU data, with exclusive access to + * another CPU's data allowed but possibly subject to contention, and to + * provide very slow exclusive access to all per-CPU data. + * - Or to provide very fast and scalable read serialisation, and to provide + * very slow exclusive serialisation of data (not necessarily per-CPU data). + * + * Brlocks are also implemented as a short-hand notation for the latter use + * case. + * + * Copyright 2009, 2010, Nick Piggin, Novell Inc. + */ +#ifndef __LINUX_LGLOCK_H +#define __LINUX_LGLOCK_H + +#include +#include +#include + +/* can make br locks by using local lock for read side, global lock for write */ +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() + +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) +#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define LOCKDEP_INIT_MAP lockdep_init_map + +#define DEFINE_LGLOCK_LOCKDEP(name) \ + struct lock_class_key name##_lock_key; \ + struct lockdep_map name##_lock_dep_map; \ + EXPORT_SYMBOL(name##_lock_dep_map) + +#else +#define LOCKDEP_INIT_MAP(a, b, c, d) + +#define DEFINE_LGLOCK_LOCKDEP(name) +#endif + + +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ + +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); +#endif From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:38 +1000 Subject: [PATCH 098/104] fs: scale files_lock fs: scale files_lock Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with an lglock. The lglock provides fast access to the per-cpu lists to add and remove files. It also provides a snapshot of all the per-cpu lists (although this is very slow). One difficulty with this approach is that a file can be removed from the list by another CPU. We must track which per-cpu list the file is on with a new variale in the file struct (packed into a hole on 64-bit archs). Scalability could suffer if files are frequently removed from different cpu's list. However loads with frequent removal of files imply short interval between adding and removing the files, and the scheduler attempts to avoid moving processes too far away. Also, even in the case of cross-CPU removal, the hardware has much more opportunity to parallelise cacheline transfers with N cachelines than with 1. A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs degenerates to contending on a single lock, which is no worse than before. When more than one CPU are allocating files, even if they are always freed by different CPUs, there will be more parallelism than the single-lock case. Testing results: On a 2 socket, 8 core opteron, I measure the number of times the lock is taken to remove the file, the number of times it is removed by the same CPU that added it, and the number of times it is removed by the same node that added it. Booting: locks= 25049 cpu-hits= 23174 (92.5%) node-hits= 23945 (95.6%) kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%) dbench 64 locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%) So a file is removed from the same CPU it was added by over 90% of the time. It remains within the same node 95% of the time. Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile. throughput 2.6.34-rc2 24.5 +patch 24.9 us sys idle IO wait (in %) 2.6.34-rc2 51.25 28.25 17.25 3.25 +patch 53.75 18.5 19 8.75 So significantly less CPU time spent in kernel code, higher idle time and slightly higher throughput. Single threaded performance difference was within the noise of microbenchmarks. That is not to say penalty does not exist, the code is larger and more memory accesses required so it will be slightly slower. Cc: linux-kernel@vger.kernel.org Cc: Tim Chen Cc: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 110 +++++++++++++++++++++++++++++++++++++-------- fs/super.c | 18 ++++++++ include/linux/fs.h | 7 +++ 3 files changed, 116 insertions(+), 19 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 6f0e62ecfddd..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -32,7 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -336,30 +339,98 @@ void put_filp(struct file *file) } } -void file_sb_list_add(struct file *file, struct super_block *sb) +static inline int file_list_cpu(struct file *file) { - spin_lock(&files_lock); - BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, &sb->s_files); - spin_unlock(&files_lock); +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif } +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); +} + +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ +void file_sb_list_add(struct file *file, struct super_block *sb) +{ + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); +} + +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spin_lock(&files_lock); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - spin_unlock(&files_lock); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - spin_lock(&files_lock); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); return 0; } @@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - spin_lock(&files_lock); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -406,12 +477,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e65add0f163..76041b614758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -920,6 +920,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1334,7 +1337,11 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ From 99b7db7b8ffd6bb755eb0a175596421a0b581cb2 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:39 +1000 Subject: [PATCH 099/104] fs: brlock vfsmount_lock fs: brlock vfsmount_lock Use a brlock for the vfsmount lock. It must be taken for write whenever modifying the mount hash or associated fields, and may be taken for read when performing mount hash lookups. A new lock is added for the mnt-id allocator, so it doesn't need to take the heavy vfsmount write-lock. The number of atomics should remain the same for fastpath rlock cases, though code would be slightly slower due to per-cpu access. Scalability is not not be much improved in common cases yet, due to other locks (ie. dcache_lock) getting in the way. However path lookups crossing mountpoints should be one case where scalability is improved (currently requiring the global lock). The slowpath is slower due to use of brlock. On a 64 core, 64 socket, 32 node Altix system (high latency to remote nodes), a simple umount microbenchmark (mount --bind mnt mnt2 ; umount mnt2 loop 1000 times), before this patch it took 6.8s, afterwards took 7.1s, about 5% slower. Cc: Al Viro Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 11 +-- fs/internal.h | 5 +- fs/namei.c | 7 +- fs/namespace.c | 177 +++++++++++++++++++++++++++++++------------------ fs/pnode.c | 11 ++- 5 files changed, 134 insertions(+), 77 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d56a40b5a577..83293be48149 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1935,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, bool slash = false; int error = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -1964,7 +1964,7 @@ out: if (!error && !slash) error = prepend(buffer, buflen, "/", 1); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return error; global_root: @@ -2302,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) struct vfsmount *mnt = path1->mnt; struct dentry *dentry = path1->dentry; int res; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if (mnt != path2->mnt) { for (;;) { if (mnt->mnt_parent == mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } if (mnt->mnt_parent == path2->mnt) @@ -2316,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) dentry = mnt->mnt_mountpoint; } res = is_subdir(dentry, path2->dentry); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); diff --git a/fs/internal.h b/fs/internal.h index 6a5c13a80660..a6910e91cee8 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#include + struct super_block; struct linux_binprm; struct path; @@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); -extern spinlock_t vfsmount_lock; +DECLARE_BRLOCK(vfsmount_lock); + /* * fs_struct.c diff --git a/fs/namei.c b/fs/namei.c index 11de7c39ff76..24896e833565 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -595,15 +595,16 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb19c5b0..de402eb6eafb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -38,12 +40,10 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* spinlock for vfsmount related operations, inplace of dcache_lock */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); - static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); +/* + * vfsmount lock may be taken for read to prevent changes to the + * vfsmount hash, ie. during mountpoint lookups or walking back + * up the tree. + * + * It should be taken for write in all cases where the vfsmount + * tree or hash is modified or when a vfsmount structure is modified. + */ +DEFINE_BRLOCK(vfsmount_lock); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* allocation is serialized by namespace_sem */ +/* + * allocation is serialized by namespace_sem, but we need the spinlock to + * serialize with freeing. + */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); if (res == -EAGAIN) goto retry; @@ -86,11 +99,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); } /* @@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags &= ~MNT_READONLY; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) /* * find the first or last mount at @dentry on vfsmount @mnt depending on * @dir. If @dir is set return the first mount else return the last mount. + * vfsmount_lock must be held for read or write. */ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) @@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return child_mnt; } @@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) return mnt->mnt_ns == current->nsproxy->mnt_ns; } +/* + * vfsmount lock must be held for write + */ static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { @@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { @@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; @@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) old_path->dentry->d_mounted--; } +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { @@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, dentry->d_mounted++; } +/* + * vfsmount lock must be held for write + */ static void attach_mnt(struct vfsmount *mnt, struct path *path) { mnt_set_mountpoint(path->mnt, path->dentry, mnt); @@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) } /* - * the caller must hold vfsmount_lock + * vfsmount lock must be held for write */ static void commit_tree(struct vfsmount *mnt) { @@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { repeat: - if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { - if (likely(!mnt->mnt_pinned)) { - spin_unlock(&vfsmount_lock); - __mntput(mnt); - return; - } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; - spin_unlock(&vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + if (atomic_add_unless(&mnt->mnt_count, -1, 1)) + return; + br_write_lock(vfsmount_lock); + if (!atomic_dec_and_test(&mnt->mnt_count)) { + br_write_unlock(vfsmount_lock); + return; } + if (likely(!mnt->mnt_pinned)) { + br_write_unlock(vfsmount_lock); + __mntput(mnt); + return; + } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto repeat; } - EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_pinned++; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) struct mnt_namespace *ns = p->ns; int res = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (p->event != ns->event) { p->event = ns->event; res = 1; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } @@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); dput(dentry); mntput(m); } @@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) } } +/* + * vfsmount lock must be held for write + * namespace_sem must be held for write + */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; @@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(res, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); + return 0; out_cleanup_ids: @@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } @@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; path->mnt->mnt_flags = mnt_flags; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); touch_mnt_namespace(path->mnt->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } return err; } @@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -1830,6 +1871,8 @@ resume: /* * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint + * + * vfsmount_lock must be held for write */ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) { @@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; path_put(&root_parent); @@ -2279,7 +2322,7 @@ out1: out0: return error; out3: - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); goto out2; } @@ -2326,6 +2369,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + br_lock_init(vfsmount_lock); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a83149..8066b8dd748f 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) return 0; } +/* + * vfsmount lock must be held for write + */ void change_mnt_propagation(struct vfsmount *mnt, int type) { if (type == MS_SHARED) { @@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); return ret; } @@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. + * + * vfsmount lock must be held for read or write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { @@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. + * + * vfsmount lock must be held for write */ int propagate_umount(struct list_head *list) { From 56385a12d9bb9e173751f74b6c430742018cafc0 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 18 Aug 2010 14:08:17 +0200 Subject: [PATCH 100/104] ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay parameter) With some hardware combinations, the PCM interrupts are acknowledged before the period boundary from the emu10k1 chip. The midlevel PCM code gets confused and the playback stream is interrupted. It seems that the interrupt processing shift by 2 samples is enough to fix this issue. This default value does not harm other, non-affected hardware. More information: Kernel bugzilla bug#16300 [A copmile warning fixed by tiwai] Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 1 + sound/core/pcm_native.c | 4 ++++ sound/pci/emu10k1/emu10k1.c | 4 ++++ sound/pci/emu10k1/emupcm.c | 30 ++++++++++++++++++++++++++---- sound/pci/emu10k1/memory.c | 4 +++- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 6a664c3f7c1e..7dc97d12253c 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1707,6 +1707,7 @@ struct snd_emu10k1 { unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ unsigned long dma_mask; /* PCI DMA mask */ + unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ struct snd_dma_buffer silent_page; /* silent page */ struct snd_dma_buffer ptb_pages; /* page table pages */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a3b2a6479246..134fc6c2e08d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) { if (substream->runtime->trigger_master != substream) return 0; + /* some drivers might use hw_ptr to recover from the pause - + update the hw_ptr now */ + if (push) + snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta betwen the current jiffies, this gives a large enough * delta, effectively to skip the check once. diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 4203782d7cb7..aff8387c45cf 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64}; static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128}; static int enable_ir[SNDRV_CARDS]; static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */ +static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2}; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard."); @@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444); MODULE_PARM_DESC(enable_ir, "Enable IR."); module_param_array(subsystem, uint, NULL, 0444); MODULE_PARM_DESC(subsystem, "Force card subsystem model."); +module_param_array(delay_pcm_irq, uint, NULL, 0444); +MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0)."); /* * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400 */ @@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, &emu)) < 0) goto error; card->private_data = emu; + emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f; if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0) goto error; if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 55b83ef73c63..622bace148e3 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, evoice->epcm->ccca_start_addr = start_addr + ccis; if (extra) { start_addr += ccis; - end_addr += ccis; + end_addr += ccis + emu->delay_pcm_irq; } if (stereo && !extra) { snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK); @@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, /* Assumption that PT is already 0 so no harm overwriting */ snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]); snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24)); - snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24)); + snd_emu10k1_ptr_write(emu, PSST, voice, + (start_addr + (extra ? emu->delay_pcm_irq : 0)) | + (send_amount[2] << 24)); if (emu->card_capabilities->emu_model) pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */ else @@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_ snd_emu10k1_ptr_write(emu, IP, voice, 0); } +static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu, + struct snd_emu10k1_pcm *epcm, + struct snd_pcm_substream *substream, + struct snd_pcm_runtime *runtime) +{ + unsigned int ptr, period_pos; + + /* try to sychronize the current position for the interrupt + source voice */ + period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt; + period_pos %= runtime->period_size; + ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number); + ptr &= ~0x00ffffff; + ptr |= epcm->ccca_start_addr + period_pos; + snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr); +} + static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, int cmd) { @@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, /* follow thru */ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: + if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE) + snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime); mix = &emu->pcm_mixer[substream->number]; snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix); snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix); @@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream * #endif /* printk(KERN_DEBUG - "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n", - ptr, runtime->buffer_size, runtime->period_size); + "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n", + (long)ptr, (long)runtime->buffer_size, + (long)runtime->period_size); */ return ptr; } diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index ffb1ddb8dc28..957a311514c8 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!hdr)) return NULL; + idx = runtime->period_size >= runtime->buffer_size ? + (emu->delay_pcm_irq * 2) : 0; mutex_lock(&hdr->block_mutex); - blk = search_empty(emu, runtime->dma_bytes); + blk = search_empty(emu, runtime->dma_bytes + idx); if (blk == NULL) { mutex_unlock(&hdr->block_mutex); return NULL; From 0a377cff9428af2da2b293d11e07bc4dbf064ee5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Aug 2010 09:25:42 -0400 Subject: [PATCH 101/104] NFS: Fix an Oops in the NFSv4 atomic open code Adam Lackorzynski reports: with 2.6.35.2 I'm getting this reproducible Oops: [ 110.825396] BUG: unable to handle kernel NULL pointer dereference at (null) [ 110.828638] IP: [] encode_attrs+0x1a/0x2a4 [ 110.828638] PGD be89f067 PUD bf18f067 PMD 0 [ 110.828638] Oops: 0000 [#1] SMP [ 110.828638] last sysfs file: /sys/class/net/lo/operstate [ 110.828638] CPU 2 [ 110.828638] Modules linked in: rtc_cmos rtc_core rtc_lib amd64_edac_mod i2c_amd756 edac_core i2c_core dm_mirror dm_region_hash dm_log dm_snapshot sg sr_mod usb_storage ohci_hcd mptspi tg3 mptscsih mptbase usbcore nls_base [last unloaded: scsi_wait_scan] [ 110.828638] [ 110.828638] Pid: 11264, comm: setchecksum Not tainted 2.6.35.2 #1 [ 110.828638] RIP: 0010:[] [] encode_attrs+0x1a/0x2a4 [ 110.828638] RSP: 0000:ffff88003bf5b878 EFLAGS: 00010296 [ 110.828638] RAX: ffff8800bddb48a8 RBX: ffff88003bf5bb18 RCX: 0000000000000000 [ 110.828638] RDX: ffff8800be258800 RSI: 0000000000000000 RDI: ffff88003bf5b9f8 [ 110.828638] RBP: 0000000000000000 R08: ffff8800bddb48a8 R09: 0000000000000004 [ 110.828638] R10: 0000000000000003 R11: ffff8800be779000 R12: ffff8800be258800 [ 110.828638] R13: ffff88003bf5b9f8 R14: ffff88003bf5bb20 R15: ffff8800be258800 [ 110.828638] FS: 0000000000000000(0000) GS:ffff880041e00000(0063) knlGS:00000000556bd6b0 [ 110.828638] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 110.828638] CR2: 0000000000000000 CR3: 00000000be8ef000 CR4: 00000000000006e0 [ 110.828638] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 110.828638] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 110.828638] Process setchecksum (pid: 11264, threadinfo ffff88003bf5a000, task ffff88003f232210) [ 110.828638] Stack: [ 110.828638] 0000000000000000 ffff8800bfbcf920 0000000000000000 0000000000000ffe [ 110.828638] <0> 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 110.828638] <0> 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 110.828638] Call Trace: [ 110.828638] [] ? nfs4_xdr_enc_setattr+0x90/0xb4 [ 110.828638] [] ? call_transmit+0x1c3/0x24a [ 110.828638] [] ? __rpc_execute+0x78/0x22a [ 110.828638] [] ? rpc_run_task+0x21/0x2b [ 110.828638] [] ? rpc_call_sync+0x3d/0x5d [ 110.828638] [] ? _nfs4_do_setattr+0x11b/0x147 [ 110.828638] [] ? nfs_init_locked+0x0/0x32 [ 110.828638] [] ? ifind+0x4e/0x90 [ 110.828638] [] ? nfs4_do_setattr+0x4b/0x6e [ 110.828638] [] ? nfs4_do_open+0x291/0x3a6 [ 110.828638] [] ? nfs4_open_revalidate+0x63/0x14a [ 110.828638] [] ? nfs_open_revalidate+0xd7/0x161 [ 110.828638] [] ? do_lookup+0x1a4/0x201 [ 110.828638] [] ? link_path_walk+0x6a/0x9d5 [ 110.828638] [] ? do_last+0x17b/0x58e [ 110.828638] [] ? do_filp_open+0x1bd/0x56e [ 110.828638] [] ? _atomic_dec_and_lock+0x30/0x48 [ 110.828638] [] ? dput+0x37/0x152 [ 110.828638] [] ? alloc_fd+0x69/0x10a [ 110.828638] [] ? do_sys_open+0x56/0x100 [ 110.828638] [] ? ia32_sysret+0x0/0x5 [ 110.828638] Code: 83 f1 01 e8 f5 ca ff ff 48 83 c4 50 5b 5d 41 5c c3 41 57 41 56 41 55 49 89 fd 41 54 49 89 d4 55 48 89 f5 53 48 81 ec 18 01 00 00 <8b> 06 89 c2 83 e2 08 83 fa 01 19 db 83 e3 f8 83 c3 18 a8 01 8d [ 110.828638] RIP [] encode_attrs+0x1a/0x2a4 [ 110.828638] RSP [ 110.828638] CR2: 0000000000000000 [ 112.840396] ---[ end trace 95282e83fd77358f ]--- We need to ensure that the O_EXCL flag is turned off if the user doesn't set O_CREAT. Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/nfs4proc.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd91b2778315..e257172d438c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1110,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_TRUNC); + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); /* * Note: we're not holding inode->i_mutex and so may be racing with diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b44bbfb7d8a..089da5b5d20a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + int open_flags = nd->intent.open.flags; + fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (!IS_POSIXACL(dir)) attr.ia_mode &= ~current_umask(); } else { + open_flags &= ~O_EXCL; attr.ia_valid = 0; - BUG_ON(nd->intent.open.flags & O_CREAT); + BUG_ON(open_flags & O_CREAT); } cred = rpc_lookup_cred(); @@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { From 81ca03a0e2ea0207b2df80e0edcf4c775c07a505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 18 Aug 2010 09:25:38 -0700 Subject: [PATCH 102/104] mmc: build fix: mmc_pm_notify is only available with CONFIG_PM=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a build breakage introduced by commit 4c2ef25fe0b8 ("mmc: fix all hangs related to mmc/sd card insert/removal during suspend/resume") Cc: David Brownell Cc: Alan Stern Cc: linux-mmc@vger.kernel.org Cc: Andrew Morton Signed-off-by: Uwe Kleine-König Acked-by: Kukjin Kim Acked-by: Maxim Levitsky Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/mmc/core/host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 0efe631e50ca..d80cfdc8edd2 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -86,7 +86,9 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) init_waitqueue_head(&host->wq); INIT_DELAYED_WORK(&host->detect, mmc_rescan); INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable); +#ifdef CONFIG_PM host->pm_notify.notifier_call = mmc_pm_notify; +#endif /* * By default, hosts do not support SGIO or large requests. From 145e5aa269d54e4ea53fdb5e597007bd287fbe49 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 18 Aug 2010 10:17:44 -0700 Subject: [PATCH 103/104] =?UTF-8?q?[IA64]=20Fix=20build=20error:=20conflic?= =?UTF-8?q?ting=20types=20for=20=E2=80=98sys=5Fexecve=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/ia64/kernel/process.c:636: error: conflicting types for ‘sys_execve’ commit d7627467b7a8dd6944885290a03a07ceb28c10eb Make do_execve() take a const filename pointer Missed the declaration of sys_execve in the ia64 asm/unistd.h (perhaps because there is no reason for it to be there ... it might be a left over from the COMPAT code?). Just delete the conflicting version. Signed-off-by: Tony Luck --- arch/ia64/include/asm/unistd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 87f1bd1efc82..954d398a54b4 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -356,8 +356,6 @@ asmlinkage unsigned long sys_mmap2( int fd, long pgoff); struct pt_regs; struct sigaction; -long sys_execve(const char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); asmlinkage long sys_ia64_pipe(void); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, From d15ca3203754359cfe5d18910722d3089b204cc4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Aug 2010 18:55:33 +0100 Subject: [PATCH 104/104] Fix the declaration of sys_execve() in asm-generic/syscalls.h Fix the declaration of sys_execve() in asm-generic/syscalls.h to have various consts applied to its pointers. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-generic/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index df84e3b04555..d89dec864d42 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs); #endif #ifndef sys_execve -asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); +asmlinkage long sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs); #endif #ifndef sys_mmap2