From 42c4dfc213190fafffc53815c2ee6064430bc379 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:28:17 -0400 Subject: [PATCH 01/16] cifs: turn read_from_socket into a wrapper around a vectorized version Eventually we'll want to allow cifsd to read data directly into the pagecache. In order to do that we'll need a routine that can take a kvec array and pass that directly to kernel_recvmsg. Unfortunately though, the kernel's recvmsg routines modify the kvec array that gets passed in, so we need to use a copy of the kvec array and refresh that copy on each pass through the loop. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 68 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 97a65af2a08a..4860940b748b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -375,14 +375,54 @@ server_unresponsive(struct TCP_Server_Info *server) return false; } +/* + * kvec_array_init - clone a kvec array, and advance into it + * @new: pointer to memory for cloned array + * @iov: pointer to original array + * @nr_segs: number of members in original array + * @bytes: number of bytes to advance into the cloned array + * + * This function will copy the array provided in iov to a section of memory + * and advance the specified number of bytes into the new array. It returns + * the number of segments in the new array. "new" must be at least as big as + * the original iov array. + */ +static unsigned int +kvec_array_init(struct kvec *new, struct kvec *iov, unsigned int nr_segs, + size_t bytes) +{ + size_t base = 0; + + while (bytes || !iov->iov_len) { + int copy = min(bytes, iov->iov_len); + + bytes -= copy; + base += copy; + if (iov->iov_len == base) { + iov++; + nr_segs--; + base = 0; + } + } + memcpy(new, iov, sizeof(*iov) * nr_segs); + new->iov_base += base; + new->iov_len -= base; + return nr_segs; +} + static int -read_from_socket(struct TCP_Server_Info *server, char *buf, - unsigned int to_read) +readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, + unsigned int nr_segs, unsigned int to_read) { int length = 0; int total_read; + unsigned int segs; struct msghdr smb_msg; - struct kvec iov; + struct kvec *iov; + + iov = kmalloc(sizeof(*iov_orig) * nr_segs, GFP_NOFS); + if (!iov) + return -ENOMEM; smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; @@ -393,10 +433,11 @@ read_from_socket(struct TCP_Server_Info *server, char *buf, break; } - iov.iov_base = buf + total_read; - iov.iov_len = to_read; - length = kernel_recvmsg(server->ssocket, &smb_msg, &iov, 1, - to_read, 0); + segs = kvec_array_init(iov, iov_orig, nr_segs, total_read); + + length = kernel_recvmsg(server->ssocket, &smb_msg, + iov, segs, to_read, 0); + if (server->tcpStatus == CifsExiting) { total_read = -ESHUTDOWN; break; @@ -423,9 +464,22 @@ read_from_socket(struct TCP_Server_Info *server, char *buf, break; } } + kfree(iov); return total_read; } +static int +read_from_socket(struct TCP_Server_Info *server, char *buf, + unsigned int to_read) +{ + struct kvec iov; + + iov.iov_base = buf; + iov.iov_len = to_read; + + return readv_from_socket(server, &iov, 1, to_read); +} + static bool is_smb_response(struct TCP_Server_Info *server, unsigned char type) { From 1041e3f9919999b22c9c2a453aa0d92cd16b76ee Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:28:27 -0400 Subject: [PATCH 02/16] cifs: keep a reusable kvec array for receives Having to continually allocate a new kvec array is expensive. Allocate one that's big enough, and only reallocate it as needed. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/connect.c | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 55ebf39fb3fd..51ed2de23070 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -292,6 +292,8 @@ struct TCP_Server_Info { bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ struct delayed_work echo; /* echo ping workqueue job */ + struct kvec *iov; /* reusable kvec array for receives */ + unsigned int nr_iov; /* number of kvecs in array */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4860940b748b..ee70075c5fb1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -410,6 +410,24 @@ kvec_array_init(struct kvec *new, struct kvec *iov, unsigned int nr_segs, return nr_segs; } +static struct kvec * +get_server_iovec(struct TCP_Server_Info *server, unsigned int nr_segs) +{ + struct kvec *new_iov; + + if (server->iov && nr_segs <= server->nr_iov) + return server->iov; + + /* not big enough -- allocate a new one and release the old */ + new_iov = kmalloc(sizeof(*new_iov) * nr_segs, GFP_NOFS); + if (new_iov) { + kfree(server->iov); + server->iov = new_iov; + server->nr_iov = nr_segs; + } + return new_iov; +} + static int readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, unsigned int nr_segs, unsigned int to_read) @@ -420,7 +438,7 @@ readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, struct msghdr smb_msg; struct kvec *iov; - iov = kmalloc(sizeof(*iov_orig) * nr_segs, GFP_NOFS); + iov = get_server_iovec(server, nr_segs); if (!iov) return -ENOMEM; @@ -464,7 +482,6 @@ readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, break; } } - kfree(iov); return total_read; } @@ -669,6 +686,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) } kfree(server->hostname); + kfree(server->iov); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); From 89482a56a079f01c2f4c709f8e23fbf7eeda1b43 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:28:57 -0400 Subject: [PATCH 03/16] cifs: add a third receive phase to cifs_demultiplex_thread Have the demultiplex thread receive just enough to get to the MID, and then find it before receiving the rest. Later, we'll use this to swap in a preallocated receive buffer for some calls. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ee70075c5fb1..5308bc6e1248 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -746,11 +746,25 @@ cifs_demultiplex_thread(void *p) if (!is_smb_response(server, buf[0])) continue; - /* check the length */ - if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || - (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { - cERROR(1, "Invalid size SMB length %d pdu_length %d", - 4, pdu_length + 4); + /* make sure we have enough to get to the MID */ + if (pdu_length < sizeof(struct smb_hdr) - 1 - 4) { + cERROR(1, "SMB response too short (%u bytes)", + pdu_length); + cifs_reconnect(server); + wake_up(&server->response_q); + continue; + } + + /* read down to the MID */ + length = read_from_socket(server, buf + 4, + sizeof(struct smb_hdr) - 1 - 4); + if (length < 0) + continue; + total_read += length; + + if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cERROR(1, "SMB response too long (%u bytes)", + pdu_length); cifs_reconnect(server); wake_up(&server->response_q); continue; @@ -759,12 +773,15 @@ cifs_demultiplex_thread(void *p) /* else length ok */ if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { isLargeBuf = true; - memcpy(bigbuf, smallbuf, 4); + memcpy(bigbuf, smallbuf, total_read); smb_buffer = (struct smb_hdr *)bigbuf; buf = bigbuf; } - length = read_from_socket(server, buf + 4, pdu_length); + /* now read the rest */ + length = read_from_socket(server, + buf + sizeof(struct smb_hdr) - 1, + pdu_length - sizeof(struct smb_hdr) + 1 + 4); if (length < 0) continue; total_read += length; From ea1f4502fc939b64807f9ab0eca259321047fe83 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:05 -0400 Subject: [PATCH 04/16] cifs: move mid finding into separate routine Begin breaking up find_cifs_mid into smaller pieces. The parts that coalesce T2 responses don't really need to be done under the GlobalMid_lock anyway. Create a new function that just finds the mid on the list, and then later takes it off the list if the entire response has been received. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 115 +++++++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 48 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5308bc6e1248..0f69b311d3fc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -542,61 +542,80 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) } static struct mid_q_entry * -find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, - int *length, bool is_large_buf, bool *is_multi_rsp, char **bigbuf) +find_mid(struct TCP_Server_Info *server, struct smb_hdr *buf) { - struct mid_q_entry *mid = NULL, *tmp_mid, *ret = NULL; + struct mid_q_entry *mid; spin_lock(&GlobalMid_Lock); - list_for_each_entry_safe(mid, tmp_mid, &server->pending_mid_q, qhead) { - if (mid->mid != buf->Mid || - mid->midState != MID_REQUEST_SUBMITTED || - mid->command != buf->Command) - continue; - - if (*length == 0 && check2ndT2(buf) > 0) { - /* We have a multipart transact2 resp */ - *is_multi_rsp = true; - if (mid->resp_buf) { - /* merge response - fix up 1st*/ - *length = coalesce_t2(buf, mid->resp_buf); - if (*length > 0) { - *length = 0; - mid->multiRsp = true; - break; - } - /* All parts received or packet is malformed. */ - mid->multiEnd = true; - goto multi_t2_fnd; - } - if (!is_large_buf) { - /*FIXME: switch to already allocated largebuf?*/ - cERROR(1, "1st trans2 resp needs bigbuf"); - } else { - /* Have first buffer */ - mid->resp_buf = buf; - mid->largeBuf = true; - *bigbuf = NULL; - } - break; + list_for_each_entry(mid, &server->pending_mid_q, qhead) { + if (mid->mid == buf->Mid && + mid->midState == MID_REQUEST_SUBMITTED && + mid->command == buf->Command) { + spin_unlock(&GlobalMid_Lock); + return mid; } - mid->resp_buf = buf; - mid->largeBuf = is_large_buf; -multi_t2_fnd: - if (*length == 0) - mid->midState = MID_RESPONSE_RECEIVED; - else - mid->midState = MID_RESPONSE_MALFORMED; -#ifdef CONFIG_CIFS_STATS2 - mid->when_received = jiffies; -#endif - list_del_init(&mid->qhead); - ret = mid; - break; } spin_unlock(&GlobalMid_Lock); + return NULL; +} - return ret; +static void +dequeue_mid(struct mid_q_entry *mid, int malformed) +{ +#ifdef CONFIG_CIFS_STATS2 + mid->when_received = jiffies; +#endif + spin_lock(&GlobalMid_Lock); + if (!malformed) + mid->midState = MID_RESPONSE_RECEIVED; + else + mid->midState = MID_RESPONSE_MALFORMED; + list_del_init(&mid->qhead); + spin_unlock(&GlobalMid_Lock); +} + +static struct mid_q_entry * +find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, + int *malformed, bool is_large_buf, bool *is_multi_rsp, + char **bigbuf) +{ + struct mid_q_entry *mid = NULL; + + mid = find_mid(server, buf); + if (!mid) + return mid; + + if (*malformed == 0 && check2ndT2(buf) > 0) { + /* We have a multipart transact2 resp */ + *is_multi_rsp = true; + if (mid->resp_buf) { + /* merge response - fix up 1st*/ + *malformed = coalesce_t2(buf, mid->resp_buf); + if (*malformed > 0) { + *malformed = 0; + mid->multiRsp = true; + return NULL; + } + /* All parts received or packet is malformed. */ + mid->multiEnd = true; + goto multi_t2_fnd; + } + if (!is_large_buf) { + /*FIXME: switch to already allocated largebuf?*/ + cERROR(1, "1st trans2 resp needs bigbuf"); + } else { + /* Have first buffer */ + mid->resp_buf = buf; + mid->largeBuf = true; + *bigbuf = NULL; + } + return mid; + } + mid->resp_buf = buf; + mid->largeBuf = is_large_buf; +multi_t2_fnd: + dequeue_mid(mid, *malformed); + return mid; } static void clean_demultiplex_info(struct TCP_Server_Info *server) From ffc00e27aa5d343eb71068c185cdbd65871ccdce Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:13 -0400 Subject: [PATCH 05/16] cifs: eliminate is_multi_rsp parm to find_cifs_mid Change find_cifs_mid to only return NULL if a mid could not be found. If we got part of a multi-part T2 response, then coalesce it and still return the mid. The caller can determine the T2 receive status from the flags in the mid. With this change, there is no need to pass a pointer to "length" as well so just pass by value. If a mid is found, then we can just mark it as malformed. If one isn't found, then the value of "length" won't change anyway. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0f69b311d3fc..8d5a61561909 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -576,8 +576,7 @@ dequeue_mid(struct mid_q_entry *mid, int malformed) static struct mid_q_entry * find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, - int *malformed, bool is_large_buf, bool *is_multi_rsp, - char **bigbuf) + int malformed, bool is_large_buf, char **bigbuf) { struct mid_q_entry *mid = NULL; @@ -585,17 +584,14 @@ find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, if (!mid) return mid; - if (*malformed == 0 && check2ndT2(buf) > 0) { - /* We have a multipart transact2 resp */ - *is_multi_rsp = true; + if (malformed == 0 && check2ndT2(buf) > 0) { + mid->multiRsp = true; if (mid->resp_buf) { /* merge response - fix up 1st*/ - *malformed = coalesce_t2(buf, mid->resp_buf); - if (*malformed > 0) { - *malformed = 0; - mid->multiRsp = true; - return NULL; - } + malformed = coalesce_t2(buf, mid->resp_buf); + if (malformed > 0) + return mid; + /* All parts received or packet is malformed. */ mid->multiEnd = true; goto multi_t2_fnd; @@ -614,7 +610,7 @@ find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, mid->resp_buf = buf; mid->largeBuf = is_large_buf; multi_t2_fnd: - dequeue_mid(mid, *malformed); + dequeue_mid(mid, malformed); return mid; } @@ -725,7 +721,6 @@ cifs_demultiplex_thread(void *p) struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; bool isLargeBuf = false; - bool isMultiRsp = false; current->flags |= PF_MEMALLOC; cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); @@ -745,7 +740,6 @@ cifs_demultiplex_thread(void *p) continue; isLargeBuf = false; - isMultiRsp = false; smb_buffer = (struct smb_hdr *)smallbuf; buf = smallbuf; pdu_length = 4; /* enough to get RFC1001 header */ @@ -823,23 +817,25 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; - mid_entry = find_cifs_mid(server, smb_buffer, &length, - isLargeBuf, &isMultiRsp, &bigbuf); + mid_entry = find_cifs_mid(server, smb_buffer, length, + isLargeBuf, &bigbuf); if (mid_entry != NULL) { - mid_entry->callback(mid_entry); + if (mid_entry->multiRsp && !mid_entry->multiEnd) + continue; + /* Was previous buf put in mpx struct for multi-rsp? */ - if (!isMultiRsp) { + if (!mid_entry->multiRsp) { /* smb buffer will be freed by user thread */ if (isLargeBuf) bigbuf = NULL; else smallbuf = NULL; } + mid_entry->callback(mid_entry); } else if (length != 0) { /* response sanity checks failed */ continue; - } else if (!is_valid_oplock_break(smb_buffer, server) && - !isMultiRsp) { + } else if (!is_valid_oplock_break(smb_buffer, server)) { cERROR(1, "No task to wake, unknown frame received! " "NumMids %d", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, From 2a37ef94bb153fad13cbb091aab679d7c8b9a67f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:23 -0400 Subject: [PATCH 06/16] cifs: move buffer pointers into TCP_Server_Info We have several functions that need to access these pointers. Currently that's done with a lot of double pointer passing. Instead, move them into the TCP_Server_Info and simplify the handling. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 4 ++ fs/cifs/connect.c | 101 +++++++++++++++++++++------------------------ 2 files changed, 50 insertions(+), 55 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 51ed2de23070..a73dd1d5e7ef 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -291,9 +291,13 @@ struct TCP_Server_Info { bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ + bool large_buf; /* is current buffer large? */ struct delayed_work echo; /* echo ping workqueue job */ struct kvec *iov; /* reusable kvec array for receives */ unsigned int nr_iov; /* number of kvecs in array */ + char *smallbuf; /* pointer to current "small" buffer */ + char *bigbuf; /* pointer to current "big" buffer */ + unsigned int total_read; /* total amount of data read in this pass */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d5a61561909..8918f935bf07 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -320,27 +320,24 @@ requeue_echo: } static bool -allocate_buffers(char **bigbuf, char **smallbuf, unsigned int size, - bool is_large_buf) +allocate_buffers(struct TCP_Server_Info *server) { - char *bbuf = *bigbuf, *sbuf = *smallbuf; - - if (bbuf == NULL) { - bbuf = (char *)cifs_buf_get(); - if (!bbuf) { + if (!server->bigbuf) { + server->bigbuf = (char *)cifs_buf_get(); + if (!server->bigbuf) { cERROR(1, "No memory for large SMB response"); msleep(3000); /* retry will check if exiting */ return false; } - } else if (is_large_buf) { + } else if (server->large_buf) { /* we are reusing a dirty large buf, clear its start */ - memset(bbuf, 0, size); + memset(server->bigbuf, 0, sizeof(struct smb_hdr)); } - if (sbuf == NULL) { - sbuf = (char *)cifs_small_buf_get(); - if (!sbuf) { + if (!server->smallbuf) { + server->smallbuf = (char *)cifs_small_buf_get(); + if (!server->smallbuf) { cERROR(1, "No memory for SMB response"); msleep(1000); /* retry will check if exiting */ @@ -349,12 +346,9 @@ allocate_buffers(char **bigbuf, char **smallbuf, unsigned int size, /* beginning of smb buffer is cleared in our buf_get */ } else { /* if existing small buf clear beginning */ - memset(sbuf, 0, size); + memset(server->smallbuf, 0, sizeof(struct smb_hdr)); } - *bigbuf = bbuf; - *smallbuf = sbuf; - return true; } @@ -576,7 +570,7 @@ dequeue_mid(struct mid_q_entry *mid, int malformed) static struct mid_q_entry * find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, - int malformed, bool is_large_buf, char **bigbuf) + int malformed) { struct mid_q_entry *mid = NULL; @@ -596,19 +590,27 @@ find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, mid->multiEnd = true; goto multi_t2_fnd; } - if (!is_large_buf) { + if (!server->large_buf) { /*FIXME: switch to already allocated largebuf?*/ cERROR(1, "1st trans2 resp needs bigbuf"); } else { /* Have first buffer */ mid->resp_buf = buf; mid->largeBuf = true; - *bigbuf = NULL; + server->bigbuf = NULL; } return mid; } mid->resp_buf = buf; - mid->largeBuf = is_large_buf; + mid->largeBuf = server->large_buf; + /* Was previous buf put in mpx struct for multi-rsp? */ + if (!mid->multiRsp) { + /* smb buffer will be freed by user thread */ + if (server->large_buf) + server->bigbuf = NULL; + else + server->smallbuf = NULL; + } multi_t2_fnd: dequeue_mid(mid, malformed); return mid; @@ -715,12 +717,11 @@ cifs_demultiplex_thread(void *p) { int length; struct TCP_Server_Info *server = p; - unsigned int pdu_length, total_read; - char *buf = NULL, *bigbuf = NULL, *smallbuf = NULL; + unsigned int pdu_length; + char *buf = NULL; struct smb_hdr *smb_buffer = NULL; struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; - bool isLargeBuf = false; current->flags |= PF_MEMALLOC; cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); @@ -735,19 +736,18 @@ cifs_demultiplex_thread(void *p) if (try_to_freeze()) continue; - if (!allocate_buffers(&bigbuf, &smallbuf, - sizeof(struct smb_hdr), isLargeBuf)) + if (!allocate_buffers(server)) continue; - isLargeBuf = false; - smb_buffer = (struct smb_hdr *)smallbuf; - buf = smallbuf; + server->large_buf = false; + smb_buffer = (struct smb_hdr *)server->smallbuf; + buf = server->smallbuf; pdu_length = 4; /* enough to get RFC1001 header */ length = read_from_socket(server, buf, pdu_length); if (length < 0) continue; - total_read = length; + server->total_read = length; /* * The right amount was read from socket - 4 bytes, @@ -773,7 +773,7 @@ cifs_demultiplex_thread(void *p) sizeof(struct smb_hdr) - 1 - 4); if (length < 0) continue; - total_read += length; + server->total_read += length; if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { cERROR(1, "SMB response too long (%u bytes)", @@ -785,10 +785,11 @@ cifs_demultiplex_thread(void *p) /* else length ok */ if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { - isLargeBuf = true; - memcpy(bigbuf, smallbuf, total_read); - smb_buffer = (struct smb_hdr *)bigbuf; - buf = bigbuf; + server->large_buf = true; + memcpy(server->bigbuf, server->smallbuf, + server->total_read); + smb_buffer = (struct smb_hdr *)server->bigbuf; + buf = server->bigbuf; } /* now read the rest */ @@ -797,9 +798,9 @@ cifs_demultiplex_thread(void *p) pdu_length - sizeof(struct smb_hdr) + 1 + 4); if (length < 0) continue; - total_read += length; + server->total_read += length; - dump_smb(smb_buffer, total_read); + dump_smb(smb_buffer, server->total_read); /* * We know that we received enough to get to the MID as we @@ -810,28 +811,18 @@ cifs_demultiplex_thread(void *p) * 48 bytes is enough to display the header and a little bit * into the payload for debugging purposes. */ - length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); + length = checkSMB(smb_buffer, smb_buffer->Mid, + server->total_read); if (length != 0) cifs_dump_mem("Bad SMB: ", buf, - min_t(unsigned int, total_read, 48)); + min_t(unsigned int, server->total_read, 48)); server->lstrp = jiffies; - mid_entry = find_cifs_mid(server, smb_buffer, length, - isLargeBuf, &bigbuf); + mid_entry = find_cifs_mid(server, smb_buffer, length); if (mid_entry != NULL) { - if (mid_entry->multiRsp && !mid_entry->multiEnd) - continue; - - /* Was previous buf put in mpx struct for multi-rsp? */ - if (!mid_entry->multiRsp) { - /* smb buffer will be freed by user thread */ - if (isLargeBuf) - bigbuf = NULL; - else - smallbuf = NULL; - } - mid_entry->callback(mid_entry); + if (!mid_entry->multiRsp || mid_entry->multiEnd) + mid_entry->callback(mid_entry); } else if (length != 0) { /* response sanity checks failed */ continue; @@ -849,9 +840,9 @@ cifs_demultiplex_thread(void *p) } /* end while !EXITING */ /* buffer usually freed in free_mid - need to free it here on exit */ - cifs_buf_release(bigbuf); - if (smallbuf) /* no sense logging a debug message if NULL */ - cifs_small_buf_release(smallbuf); + cifs_buf_release(server->bigbuf); + if (server->smallbuf) /* no sense logging a debug message if NULL */ + cifs_small_buf_release(server->smallbuf); task_to_wake = xchg(&server->tsk, NULL); clean_demultiplex_info(server); From c8054ebdb6903208b83aa59c387b16d5129492d5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:31 -0400 Subject: [PATCH 07/16] cifs: find mid earlier in receive codepath In order to receive directly into a preallocated buffer, we need to ID the mid earlier, before the bulk of the response is read. Call the mid finding routine as soon as we're able to read the mid. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8918f935bf07..52195bad5e67 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -568,27 +568,21 @@ dequeue_mid(struct mid_q_entry *mid, int malformed) spin_unlock(&GlobalMid_Lock); } -static struct mid_q_entry * -find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, - int malformed) +static void +handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server, + struct smb_hdr *buf, int malformed) { - struct mid_q_entry *mid = NULL; - - mid = find_mid(server, buf); - if (!mid) - return mid; - if (malformed == 0 && check2ndT2(buf) > 0) { mid->multiRsp = true; if (mid->resp_buf) { /* merge response - fix up 1st*/ malformed = coalesce_t2(buf, mid->resp_buf); if (malformed > 0) - return mid; + return; /* All parts received or packet is malformed. */ mid->multiEnd = true; - goto multi_t2_fnd; + return dequeue_mid(mid, malformed); } if (!server->large_buf) { /*FIXME: switch to already allocated largebuf?*/ @@ -599,7 +593,7 @@ find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, mid->largeBuf = true; server->bigbuf = NULL; } - return mid; + return; } mid->resp_buf = buf; mid->largeBuf = server->large_buf; @@ -611,9 +605,7 @@ find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, else server->smallbuf = NULL; } -multi_t2_fnd: dequeue_mid(mid, malformed); - return mid; } static void clean_demultiplex_info(struct TCP_Server_Info *server) @@ -775,6 +767,8 @@ cifs_demultiplex_thread(void *p) continue; server->total_read += length; + mid_entry = find_mid(server, smb_buffer); + if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { cERROR(1, "SMB response too long (%u bytes)", pdu_length); @@ -819,8 +813,8 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; - mid_entry = find_cifs_mid(server, smb_buffer, length); if (mid_entry != NULL) { + handle_mid(mid_entry, server, smb_buffer, length); if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); } else if (length != 0) { From e9097ab48978c89b9c0926e2ae5d49bf6ea91b18 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:40 -0400 Subject: [PATCH 08/16] cifs: break out 3rd receive phase into separate function Move the entire 3rd phase of the receive codepath into a separate function in preparation for the addition of a pluggable receive function. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 103 +++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 52195bad5e67..f05dedda37c6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -704,6 +704,61 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) GFP_KERNEL); } +static int +standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length; + char *buf = server->smallbuf; + struct smb_hdr *smb_buffer = (struct smb_hdr *)buf; + unsigned int pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); + + /* make sure this will fit in a large buffer */ + if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cERROR(1, "SMB response too long (%u bytes)", + pdu_length); + cifs_reconnect(server); + wake_up(&server->response_q); + return -EAGAIN; + } + + /* switch to large buffer if too big for a small one */ + if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { + server->large_buf = true; + memcpy(server->bigbuf, server->smallbuf, server->total_read); + buf = server->bigbuf; + smb_buffer = (struct smb_hdr *)buf; + } + + /* now read the rest */ + length = read_from_socket(server, + buf + sizeof(struct smb_hdr) - 1, + pdu_length - sizeof(struct smb_hdr) + 1 + 4); + if (length < 0) + return length; + server->total_read += length; + + dump_smb(smb_buffer, server->total_read); + + /* + * We know that we received enough to get to the MID as we + * checked the pdu_length earlier. Now check to see + * if the rest of the header is OK. We borrow the length + * var for the rest of the loop to avoid a new stack var. + * + * 48 bytes is enough to display the header and a little bit + * into the payload for debugging purposes. + */ + length = checkSMB(smb_buffer, smb_buffer->Mid, server->total_read); + if (length != 0) + cifs_dump_mem("Bad SMB: ", buf, + min_t(unsigned int, server->total_read, 48)); + + if (mid) + handle_mid(mid, server, smb_buffer, length); + + return length; +} + static int cifs_demultiplex_thread(void *p) { @@ -769,57 +824,19 @@ cifs_demultiplex_thread(void *p) mid_entry = find_mid(server, smb_buffer); - if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cERROR(1, "SMB response too long (%u bytes)", - pdu_length); - cifs_reconnect(server); - wake_up(&server->response_q); - continue; - } - - /* else length ok */ - if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { - server->large_buf = true; - memcpy(server->bigbuf, server->smallbuf, - server->total_read); - smb_buffer = (struct smb_hdr *)server->bigbuf; - buf = server->bigbuf; - } - - /* now read the rest */ - length = read_from_socket(server, - buf + sizeof(struct smb_hdr) - 1, - pdu_length - sizeof(struct smb_hdr) + 1 + 4); + length = standard_receive3(server, mid_entry); if (length < 0) continue; - server->total_read += length; - dump_smb(smb_buffer, server->total_read); - - /* - * We know that we received enough to get to the MID as we - * checked the pdu_length earlier. Now check to see - * if the rest of the header is OK. We borrow the length - * var for the rest of the loop to avoid a new stack var. - * - * 48 bytes is enough to display the header and a little bit - * into the payload for debugging purposes. - */ - length = checkSMB(smb_buffer, smb_buffer->Mid, - server->total_read); - if (length != 0) - cifs_dump_mem("Bad SMB: ", buf, - min_t(unsigned int, server->total_read, 48)); + if (server->large_buf) { + buf = server->bigbuf; + smb_buffer = (struct smb_hdr *)buf; + } server->lstrp = jiffies; - if (mid_entry != NULL) { - handle_mid(mid_entry, server, smb_buffer, length); if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (length != 0) { - /* response sanity checks failed */ - continue; } else if (!is_valid_oplock_break(smb_buffer, server)) { cERROR(1, "No task to wake, unknown frame received! " "NumMids %d", atomic_read(&midCount)); From 44d22d846fdc7c3e688fc1ff5ae6d06d08bb5656 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:49 -0400 Subject: [PATCH 09/16] cifs: add a callback function to receive the rest of the frame In order to handle larger SMBs for readpages and other calls, we want to be able to read into a preallocated set of buffers. Rather than changing all of the existing code to preallocate buffers however, we instead add a receive callback function to the MID. cifsd will call this function once the mid_q_entry has been identified in order to receive the rest of the SMB. If the mid can't be identified or the receive pointer is unset, then the standard 3rd phase receive function will be called. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 23 ++++++++++++++++++++--- fs/cifs/cifsproto.h | 5 +++-- fs/cifs/cifssmb.c | 5 +++-- fs/cifs/connect.c | 6 +++++- fs/cifs/transport.c | 5 +++-- 5 files changed, 34 insertions(+), 10 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a73dd1d5e7ef..d153d0b89d39 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -656,8 +656,24 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, struct mid_q_entry; /* - * This is the prototype for the mid callback function. When creating one, - * take special care to avoid deadlocks. Things to bear in mind: + * This is the prototype for the mid receive function. This function is for + * receiving the rest of the SMB frame, starting with the WordCount (which is + * just after the MID in struct smb_hdr). Note: + * + * - This will be called by cifsd, with no locks held. + * - The mid will still be on the pending_mid_q. + * - mid->resp_buf will point to the current buffer. + * + * Returns zero on a successful receive, or an error. The receive state in + * the TCP_Server_Info will also be updated. + */ +typedef int (mid_receive_t)(struct TCP_Server_Info *server, + struct mid_q_entry *mid); + +/* + * This is the prototype for the mid callback function. This is called once the + * mid has been received off of the socket. When creating one, take special + * care to avoid deadlocks. Things to bear in mind: * * - it will be called by cifsd, with no locks held * - the mid will be removed from any lists @@ -675,9 +691,10 @@ struct mid_q_entry { unsigned long when_sent; /* time when smb send finished */ unsigned long when_received; /* when demux complete (taken off wire) */ #endif + mid_receive_t *receive; /* call receive callback */ mid_callback_t *callback; /* call completion callback */ void *callback_data; /* general purpose pointer for callback */ - struct smb_hdr *resp_buf; /* response buffer */ + struct smb_hdr *resp_buf; /* pointer to received SMB header */ int midState; /* wish this were enum but can not pass to wait_event */ __u8 command; /* smb command code */ bool largeBuf:1; /* if valid response, is pointer to large buf */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a1fa9cec05d6..8a7adb31ffed 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -69,8 +69,9 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_callback_t *callback, - void *cbdata, bool ignore_pend); + unsigned int nvec, mid_receive_t *receive, + mid_callback_t *callback, void *cbdata, + bool ignore_pend); extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c824c106b2b7..0613df4d8e74 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -737,7 +737,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server) iov.iov_base = smb; iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; - rc = cifs_call_async(server, &iov, 1, cifs_echo_callback, server, true); + rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, + server, true); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -1834,7 +1835,7 @@ cifs_async_writev(struct cifs_writedata *wdata) kref_get(&wdata->refcount); rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, - cifs_writev_callback, wdata, false); + NULL, cifs_writev_callback, wdata, false); if (rc == 0) cifs_stats_inc(&tcon->num_writes); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f05dedda37c6..eeee2f5d13ce 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -824,7 +824,11 @@ cifs_demultiplex_thread(void *p) mid_entry = find_mid(server, smb_buffer); - length = standard_receive3(server, mid_entry); + if (!mid_entry || !mid_entry->receive) + length = standard_receive3(server, mid_entry); + else + length = mid_entry->receive(server, mid_entry); + if (length < 0) continue; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 33a3fbf3a3a5..e7398d0cd054 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -339,8 +339,8 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) */ int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_callback_t *callback, void *cbdata, - bool ignore_pend) + unsigned int nvec, mid_receive_t *receive, + mid_callback_t *callback, void *cbdata, bool ignore_pend) { int rc; struct mid_q_entry *mid; @@ -374,6 +374,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, goto out_err; } + mid->receive = receive; mid->callback = callback; mid->callback_data = cbdata; mid->midState = MID_REQUEST_SUBMITTED; From 2ab2593f4b8953ff951f5531e695e487dfe0b51f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:29:59 -0400 Subject: [PATCH 10/16] cifs: fix protocol definition for READ_RSP There is no pad, and it simplifies the code to remove the "Data" field. None of the existing code relies on these fields, or on the READ_RSP being a particular length. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/cifspdu.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index de3aa285de03..3c6ef34fe2bc 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1089,9 +1089,7 @@ typedef struct smb_com_read_rsp { __le16 DataLengthHigh; __u64 Reserved2; __u16 ByteCount; - __u8 Pad; /* BB check for whether padded to DWORD - boundary and optimum performance here */ - char Data[1]; + /* read response data immediately follows */ } __attribute__((packed)) READ_RSP; typedef struct locking_andx_range { From e28bc5b1fdbd6e850488234d6072e6b66fc46146 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:07 -0400 Subject: [PATCH 11/16] cifs: add cifs_async_readv ...which will allow cifs to do an asynchronous read call to the server. The caller will allocate and set up cifs_readdata for each READ_AND_X call that should be issued on the wire. The pages passed in are added to the pagecache, but not placed on the LRU list yet (as we need the page->lru to keep the pages on the list in the readdata). When cifsd identifies the mid, it will see that there is a special receive handler for the call, and use that to receive the rest of the frame. cifs_readv_receive will then marshal up a kvec array with kmapped pages from the pagecache, which eliminates one copy of the data. Once the data is received, the pages are added to the LRU list, set uptodate, and unlocked. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/cifsproto.h | 24 +++ fs/cifs/cifssmb.c | 359 ++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/connect.c | 26 ++-- 3 files changed, 396 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8a7adb31ffed..c25d0636cc4f 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -154,6 +154,12 @@ extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); +extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); +extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, + unsigned int to_read); +extern int cifs_readv_from_socket(struct TCP_Server_Info *server, + struct kvec *iov_orig, unsigned int nr_segs, + unsigned int to_read); extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); @@ -443,6 +449,24 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); +/* asynchronous read support */ +struct cifs_readdata { + struct cifsFileInfo *cfile; + struct address_space *mapping; + __u64 offset; + unsigned int bytes; + pid_t pid; + int result; + struct list_head pages; + struct work_struct work; + unsigned int nr_iov; + struct kvec iov[1]; +}; + +struct cifs_readdata *cifs_readdata_alloc(unsigned int nr_pages); +void cifs_readdata_free(struct cifs_readdata *rdata); +int cifs_async_readv(struct cifs_readdata *rdata); + /* asynchronous write support */ struct cifs_writedata { struct kref refcount; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 0613df4d8e74..aaad4ce6e6c5 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include "cifspdu.h" #include "cifsglob.h" @@ -40,6 +42,7 @@ #include "cifsproto.h" #include "cifs_unicode.h" #include "cifs_debug.h" +#include "fscache.h" #ifdef CONFIG_CIFS_POSIX static struct { @@ -83,6 +86,9 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ +/* Forward declarations */ +static void cifs_readv_complete(struct work_struct *work); + /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ static void mark_open_files_invalid(struct cifs_tcon *pTcon) @@ -1375,6 +1381,359 @@ openRetry: return rc; } +struct cifs_readdata * +cifs_readdata_alloc(unsigned int nr_pages) +{ + struct cifs_readdata *rdata; + + /* readdata + 1 kvec for each page */ + rdata = kzalloc(sizeof(*rdata) + + sizeof(struct kvec) * nr_pages, GFP_KERNEL); + if (rdata != NULL) { + INIT_WORK(&rdata->work, cifs_readv_complete); + INIT_LIST_HEAD(&rdata->pages); + } + return rdata; +} + +void +cifs_readdata_free(struct cifs_readdata *rdata) +{ + cifsFileInfo_put(rdata->cfile); + kfree(rdata); +} + +/* + * Discard any remaining data in the current SMB. To do this, we borrow the + * current bigbuf. + */ +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + READ_RSP *rsp = (READ_RSP *)server->smallbuf; + unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length); + int remaining = rfclen + 4 - server->total_read; + struct cifs_readdata *rdata = mid->callback_data; + + while (remaining > 0) { + int length; + + length = cifs_read_from_socket(server, server->bigbuf, + min_t(unsigned int, remaining, + CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)); + if (length < 0) + return length; + server->total_read += length; + remaining -= length; + } + + dequeue_mid(mid, rdata->result); + return 0; +} + +static int +cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length, len; + unsigned int data_offset, remaining, data_len; + struct cifs_readdata *rdata = mid->callback_data; + READ_RSP *rsp = (READ_RSP *)server->smallbuf; + unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length) + 4; + u64 eof; + pgoff_t eof_index; + struct page *page, *tpage; + + cFYI(1, "%s: mid=%u offset=%llu bytes=%u", __func__, + mid->mid, rdata->offset, rdata->bytes); + + /* + * read the rest of READ_RSP header (sans Data array), or whatever we + * can if there's not enough data. At this point, we've read down to + * the Mid. + */ + len = min_t(unsigned int, rfclen, sizeof(*rsp)) - + sizeof(struct smb_hdr) + 1; + + rdata->iov[0].iov_base = server->smallbuf + sizeof(struct smb_hdr) - 1; + rdata->iov[0].iov_len = len; + + length = cifs_readv_from_socket(server, rdata->iov, 1, len); + if (length < 0) + return length; + server->total_read += length; + + /* Was the SMB read successful? */ + rdata->result = map_smb_to_linux_error(&rsp->hdr, false); + if (rdata->result != 0) { + cFYI(1, "%s: server returned error %d", __func__, + rdata->result); + return cifs_readv_discard(server, mid); + } + + /* Is there enough to get to the rest of the READ_RSP header? */ + if (server->total_read < sizeof(READ_RSP)) { + cFYI(1, "%s: server returned short header. got=%u expected=%zu", + __func__, server->total_read, sizeof(READ_RSP)); + rdata->result = -EIO; + return cifs_readv_discard(server, mid); + } + + data_offset = le16_to_cpu(rsp->DataOffset) + 4; + if (data_offset < server->total_read) { + /* + * win2k8 sometimes sends an offset of 0 when the read + * is beyond the EOF. Treat it as if the data starts just after + * the header. + */ + cFYI(1, "%s: data offset (%u) inside read response header", + __func__, data_offset); + data_offset = server->total_read; + } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { + /* data_offset is beyond the end of smallbuf */ + cFYI(1, "%s: data offset (%u) beyond end of smallbuf", + __func__, data_offset); + rdata->result = -EIO; + return cifs_readv_discard(server, mid); + } + + cFYI(1, "%s: total_read=%u data_offset=%u", __func__, + server->total_read, data_offset); + + len = data_offset - server->total_read; + if (len > 0) { + /* read any junk before data into the rest of smallbuf */ + rdata->iov[0].iov_base = server->smallbuf + server->total_read; + rdata->iov[0].iov_len = len; + length = cifs_readv_from_socket(server, rdata->iov, 1, len); + if (length < 0) + return length; + server->total_read += length; + } + + /* set up first iov for signature check */ + rdata->iov[0].iov_base = server->smallbuf; + rdata->iov[0].iov_len = server->total_read; + cFYI(1, "0: iov_base=%p iov_len=%zu", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + + /* how much data is in the response? */ + data_len = le16_to_cpu(rsp->DataLengthHigh) << 16; + data_len += le16_to_cpu(rsp->DataLength); + if (data_offset + data_len > rfclen) { + /* data_len is corrupt -- discard frame */ + rdata->result = -EIO; + return cifs_readv_discard(server, mid); + } + + /* marshal up the page array */ + len = 0; + remaining = data_len; + rdata->nr_iov = 1; + + /* determine the eof that the server (probably) has */ + eof = CIFS_I(rdata->mapping->host)->server_eof; + eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; + cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); + + list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + if (remaining >= PAGE_CACHE_SIZE) { + /* enough data to fill the page */ + rdata->iov[rdata->nr_iov].iov_base = kmap(page); + rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; + cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", + rdata->nr_iov, page->index, + rdata->iov[rdata->nr_iov].iov_base, + rdata->iov[rdata->nr_iov].iov_len); + ++rdata->nr_iov; + len += PAGE_CACHE_SIZE; + remaining -= PAGE_CACHE_SIZE; + } else if (remaining > 0) { + /* enough for partial page, fill and zero the rest */ + rdata->iov[rdata->nr_iov].iov_base = kmap(page); + rdata->iov[rdata->nr_iov].iov_len = remaining; + cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", + rdata->nr_iov, page->index, + rdata->iov[rdata->nr_iov].iov_base, + rdata->iov[rdata->nr_iov].iov_len); + memset(rdata->iov[rdata->nr_iov].iov_base + remaining, + '\0', PAGE_CACHE_SIZE - remaining); + ++rdata->nr_iov; + len += remaining; + remaining = 0; + } else if (page->index > eof_index) { + /* + * The VFS will not try to do readahead past the + * i_size, but it's possible that we have outstanding + * writes with gaps in the middle and the i_size hasn't + * caught up yet. Populate those with zeroed out pages + * to prevent the VFS from repeatedly attempting to + * fill them until the writes are flushed. + */ + zero_user(page, 0, PAGE_CACHE_SIZE); + list_del(&page->lru); + lru_cache_add_file(page); + flush_dcache_page(page); + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + } else { + /* no need to hold page hostage */ + list_del(&page->lru); + lru_cache_add_file(page); + unlock_page(page); + page_cache_release(page); + } + } + + /* issue the read if we have any iovecs left to fill */ + if (rdata->nr_iov > 1) { + length = cifs_readv_from_socket(server, &rdata->iov[1], + rdata->nr_iov - 1, len); + if (length < 0) + return length; + server->total_read += length; + } else { + length = 0; + } + + rdata->bytes = length; + + cFYI(1, "total_read=%u rfclen=%u remaining=%u", server->total_read, + rfclen, remaining); + + /* discard anything left over */ + if (server->total_read < rfclen) + return cifs_readv_discard(server, mid); + + dequeue_mid(mid, false); + return length; +} + +static void +cifs_readv_complete(struct work_struct *work) +{ + struct cifs_readdata *rdata = container_of(work, + struct cifs_readdata, work); + struct page *page, *tpage; + + list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + list_del(&page->lru); + lru_cache_add_file(page); + kunmap(page); + + if (rdata->result == 0) { + flush_dcache_page(page); + SetPageUptodate(page); + } + + unlock_page(page); + + if (rdata->result == 0) + cifs_readpage_to_fscache(rdata->mapping->host, page); + + page_cache_release(page); + } + cifs_readdata_free(rdata); +} + +static void +cifs_readv_callback(struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + + cFYI(1, "%s: mid=%u state=%d result=%d bytes=%u", __func__, + mid->mid, mid->midState, rdata->result, rdata->bytes); + + switch (mid->midState) { + case MID_RESPONSE_RECEIVED: + /* result already set, check signature */ + if (server->sec_mode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (cifs_verify_signature(rdata->iov, rdata->nr_iov, + server, mid->sequence_number + 1)) + cERROR(1, "Unexpected SMB signature"); + } + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->bytes); + cifs_stats_bytes_read(tcon, rdata->bytes); + break; + case MID_REQUEST_SUBMITTED: + case MID_RETRY_NEEDED: + rdata->result = -EAGAIN; + break; + default: + rdata->result = -EIO; + } + + queue_work(system_nrt_wq, &rdata->work); + DeleteMidQEntry(mid); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); +} + +/* cifs_async_readv - send an async write, and set up mid to handle result */ +int +cifs_async_readv(struct cifs_readdata *rdata) +{ + int rc; + READ_REQ *smb = NULL; + int wct; + struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + + cFYI(1, "%s: offset=%llu bytes=%u", __func__, + rdata->offset, rdata->bytes); + + if (tcon->ses->capabilities & CAP_LARGE_FILES) + wct = 12; + else { + wct = 10; /* old style read */ + if ((rdata->offset >> 32) > 0) { + /* can not handle this big offset for old */ + return -EIO; + } + } + + rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **)&smb); + if (rc) + return rc; + + smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid); + smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); + + smb->AndXCommand = 0xFF; /* none */ + smb->Fid = rdata->cfile->netfid; + smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); + if (wct == 12) + smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); + smb->Remaining = 0; + smb->MaxCount = cpu_to_le16(rdata->bytes & 0xFFFF); + smb->MaxCountHigh = cpu_to_le32(rdata->bytes >> 16); + if (wct == 12) + smb->ByteCount = 0; + else { + /* old style read */ + struct smb_com_readx_req *smbr = + (struct smb_com_readx_req *)smb; + smbr->ByteCount = 0; + } + + /* 4 for RFC1001 length + 1 for BCC */ + rdata->iov[0].iov_base = smb; + rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; + + rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, + cifs_readv_receive, cifs_readv_callback, + rdata, false); + + if (rc == 0) + cifs_stats_inc(&tcon->num_reads); + + cifs_small_buf_release(smb); + return rc; +} + int CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, char **buf, int *pbuf_type) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index eeee2f5d13ce..3d518b9e8c18 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -422,9 +422,9 @@ get_server_iovec(struct TCP_Server_Info *server, unsigned int nr_segs) return new_iov; } -static int -readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, - unsigned int nr_segs, unsigned int to_read) +int +cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, + unsigned int nr_segs, unsigned int to_read) { int length = 0; int total_read; @@ -479,16 +479,16 @@ readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, return total_read; } -static int -read_from_socket(struct TCP_Server_Info *server, char *buf, - unsigned int to_read) +int +cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, + unsigned int to_read) { struct kvec iov; iov.iov_base = buf; iov.iov_len = to_read; - return readv_from_socket(server, &iov, 1, to_read); + return cifs_readv_from_socket(server, &iov, 1, to_read); } static bool @@ -553,8 +553,8 @@ find_mid(struct TCP_Server_Info *server, struct smb_hdr *buf) return NULL; } -static void -dequeue_mid(struct mid_q_entry *mid, int malformed) +void +dequeue_mid(struct mid_q_entry *mid, bool malformed) { #ifdef CONFIG_CIFS_STATS2 mid->when_received = jiffies; @@ -730,7 +730,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* now read the rest */ - length = read_from_socket(server, + length = cifs_read_from_socket(server, buf + sizeof(struct smb_hdr) - 1, pdu_length - sizeof(struct smb_hdr) + 1 + 4); if (length < 0) @@ -791,7 +791,7 @@ cifs_demultiplex_thread(void *p) buf = server->smallbuf; pdu_length = 4; /* enough to get RFC1001 header */ - length = read_from_socket(server, buf, pdu_length); + length = cifs_read_from_socket(server, buf, pdu_length); if (length < 0) continue; server->total_read = length; @@ -816,8 +816,8 @@ cifs_demultiplex_thread(void *p) } /* read down to the MID */ - length = read_from_socket(server, buf + 4, - sizeof(struct smb_hdr) - 1 - 4); + length = cifs_read_from_socket(server, buf + 4, + sizeof(struct smb_hdr) - 1 - 4); if (length < 0) continue; server->total_read += length; From 690c5e3163502f229e5b5d455e5212e28c20cd6d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:16 -0400 Subject: [PATCH 12/16] cifs: convert cifs_readpages to use async reads Now that we have code in place to do asynchronous reads, convert cifs_readpages to use it. The new cifs_readpages walks the page_list that gets passed in, locks and adds the pages to the pagecache and sets up cifs_readdata to handle the reads. The rest is handled by the cifs_async_readv infrastructure. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/file.c | 283 ++++++++++++++++++++----------------------------- 1 file changed, 114 insertions(+), 169 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 852d1f39adae..8f6917816fec 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -2000,82 +2001,24 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) return rc; } - -static void cifs_copy_cache_pages(struct address_space *mapping, - struct list_head *pages, int bytes_read, char *data) -{ - struct page *page; - char *target; - - while (bytes_read > 0) { - if (list_empty(pages)) - break; - - page = list_entry(pages->prev, struct page, lru); - list_del(&page->lru); - - if (add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL)) { - page_cache_release(page); - cFYI(1, "Add page cache failed"); - data += PAGE_CACHE_SIZE; - bytes_read -= PAGE_CACHE_SIZE; - continue; - } - page_cache_release(page); - - target = kmap_atomic(page, KM_USER0); - - if (PAGE_CACHE_SIZE > bytes_read) { - memcpy(target, data, bytes_read); - /* zero the tail end of this partial page */ - memset(target + bytes_read, 0, - PAGE_CACHE_SIZE - bytes_read); - bytes_read = 0; - } else { - memcpy(target, data, PAGE_CACHE_SIZE); - bytes_read -= PAGE_CACHE_SIZE; - } - kunmap_atomic(target, KM_USER0); - - flush_dcache_page(page); - SetPageUptodate(page); - unlock_page(page); - data += PAGE_CACHE_SIZE; - - /* add page to FS-Cache */ - cifs_readpage_to_fscache(mapping->host, page); - } - return; -} - static int cifs_readpages(struct file *file, struct address_space *mapping, struct list_head *page_list, unsigned num_pages) { - int rc = -EACCES; - int xid; - loff_t offset; - struct page *page; - struct cifs_sb_info *cifs_sb; - struct cifs_tcon *pTcon; - unsigned int bytes_read = 0; - unsigned int read_size, i; - char *smb_read_data = NULL; - struct smb_com_read_rsp *pSMBr; - struct cifsFileInfo *open_file; - struct cifs_io_parms io_parms; - int buf_type = CIFS_NO_BUFFER; - __u32 pid; + int rc; + struct list_head tmplist; + struct cifsFileInfo *open_file = file->private_data; + struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + unsigned int rsize = cifs_sb->rsize; + pid_t pid; - xid = GetXid(); - if (file->private_data == NULL) { - rc = -EBADF; - FreeXid(xid); - return rc; - } - open_file = file->private_data; - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = tlink_tcon(open_file->tlink); + /* + * Give up immediately if rsize is too small to read an entire page. + * The VFS will fall back to readpage. We should never reach this + * point however since we set ra_pages to 0 when the rsize is smaller + * than a cache page. + */ + if (unlikely(rsize < PAGE_CACHE_SIZE)) + return 0; /* * Reads as many pages as possible from fscache. Returns -ENOBUFS @@ -2084,125 +2027,127 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, &num_pages); if (rc == 0) - goto read_complete; + return rc; - cFYI(DBG2, "rpages: num pages %d", num_pages); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - for (i = 0; i < num_pages; ) { - unsigned contig_pages; - struct page *tmp_page; - unsigned long expected_index; + rc = 0; + INIT_LIST_HEAD(&tmplist); - if (list_empty(page_list)) - break; + cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file, + mapping, num_pages); + + /* + * Start with the page at end of list and move it to private + * list. Do the same with any following pages until we hit + * the rsize limit, hit an index discontinuity, or run out of + * pages. Issue the async read and then start the loop again + * until the list is empty. + * + * Note that list order is important. The page_list is in + * the order of declining indexes. When we put the pages in + * the rdata->pages, then we want them in increasing order. + */ + while (!list_empty(page_list)) { + unsigned int bytes = PAGE_CACHE_SIZE; + unsigned int expected_index; + unsigned int nr_pages = 1; + loff_t offset; + struct page *page, *tpage; + struct cifs_readdata *rdata; page = list_entry(page_list->prev, struct page, lru); - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - /* count adjacent pages that we will read into */ - contig_pages = 0; - expected_index = - list_entry(page_list->prev, struct page, lru)->index; - list_for_each_entry_reverse(tmp_page, page_list, lru) { - if (tmp_page->index == expected_index) { - contig_pages++; - expected_index++; - } else - break; + /* + * Lock the page and put it in the cache. Since no one else + * should have access to this page, we're safe to simply set + * PG_locked without checking it first. + */ + __set_page_locked(page); + rc = add_to_page_cache_locked(page, mapping, + page->index, GFP_KERNEL); + + /* give up if we can't stick it in the cache */ + if (rc) { + __clear_page_locked(page); + break; } - if (contig_pages + i > num_pages) - contig_pages = num_pages - i; - /* for reads over a certain size could initiate async - read ahead */ + /* move first page to the tmplist */ + offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + list_move_tail(&page->lru, &tmplist); - read_size = contig_pages * PAGE_CACHE_SIZE; - /* Read size needs to be in multiples of one page */ - read_size = min_t(const unsigned int, read_size, - cifs_sb->rsize & PAGE_CACHE_MASK); - cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d", - read_size, contig_pages); - rc = -EAGAIN; - while (rc == -EAGAIN) { + /* now try and add more pages onto the request */ + expected_index = page->index + 1; + list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { + /* discontinuity ? */ + if (page->index != expected_index) + break; + + /* would this page push the read over the rsize? */ + if (bytes + PAGE_CACHE_SIZE > rsize) + break; + + __set_page_locked(page); + if (add_to_page_cache_locked(page, mapping, + page->index, GFP_KERNEL)) { + __clear_page_locked(page); + break; + } + list_move_tail(&page->lru, &tmplist); + bytes += PAGE_CACHE_SIZE; + expected_index++; + nr_pages++; + } + + rdata = cifs_readdata_alloc(nr_pages); + if (!rdata) { + /* best to give up if we're out of mem */ + list_for_each_entry_safe(page, tpage, &tmplist, lru) { + list_del(&page->lru); + lru_cache_add_file(page); + unlock_page(page); + page_cache_release(page); + } + rc = -ENOMEM; + break; + } + + spin_lock(&cifs_file_list_lock); + cifsFileInfo_get(open_file); + spin_unlock(&cifs_file_list_lock); + rdata->cfile = open_file; + rdata->mapping = mapping; + rdata->offset = offset; + rdata->bytes = bytes; + rdata->pid = pid; + list_splice_init(&tmplist, &rdata->pages); + + do { if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); if (rc != 0) - break; + continue; } - io_parms.netfid = open_file->netfid; - io_parms.pid = pid; - io_parms.tcon = pTcon; - io_parms.offset = offset; - io_parms.length = read_size; - rc = CIFSSMBRead(xid, &io_parms, &bytes_read, - &smb_read_data, &buf_type); - /* BB more RC checks ? */ - if (rc == -EAGAIN) { - if (smb_read_data) { - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(smb_read_data); - else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(smb_read_data); - smb_read_data = NULL; - } + rc = cifs_async_readv(rdata); + } while (rc == -EAGAIN); + + if (rc != 0) { + list_for_each_entry_safe(page, tpage, &rdata->pages, + lru) { + list_del(&page->lru); + lru_cache_add_file(page); + unlock_page(page); + page_cache_release(page); } - } - if ((rc < 0) || (smb_read_data == NULL)) { - cFYI(1, "Read error in readpages: %d", rc); - break; - } else if (bytes_read > 0) { - task_io_account_read(bytes_read); - pSMBr = (struct smb_com_read_rsp *)smb_read_data; - cifs_copy_cache_pages(mapping, page_list, bytes_read, - smb_read_data + 4 /* RFC1001 hdr */ + - le16_to_cpu(pSMBr->DataOffset)); - - i += bytes_read >> PAGE_CACHE_SHIFT; - cifs_stats_bytes_read(pTcon, bytes_read); - if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) { - i++; /* account for partial page */ - - /* server copy of file can have smaller size - than client */ - /* BB do we need to verify this common case ? - this case is ok - if we are at server EOF - we will hit it on next read */ - - /* break; */ - } - } else { - cFYI(1, "No bytes read (%d) at offset %lld . " - "Cleaning remaining pages from readahead list", - bytes_read, offset); - /* BB turn off caching and do new lookup on - file size at server? */ + cifs_readdata_free(rdata); break; } - if (smb_read_data) { - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(smb_read_data); - else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(smb_read_data); - smb_read_data = NULL; - } - bytes_read = 0; } -/* need to free smb_read_data buf before exit */ - if (smb_read_data) { - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(smb_read_data); - else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(smb_read_data); - smb_read_data = NULL; - } - -read_complete: - FreeXid(xid); return rc; } From 5eba8ab3606621f7e175ae9f521d71f3ac534f82 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:26 -0400 Subject: [PATCH 13/16] cifs: allow for larger rsize= options and change defaults Currently we cap the rsize at a value that fits in CIFSMaxBufSize. That's not needed any longer for readpages. Allow the use of larger values for readpages. cifs_iovec_read and cifs_read however are still limited to the CIFSMaxBufSize. Make sure they don't exceed that. The patch also changes the rsize defaults. The default when unix extensions are enabled is set to 1M for parity with the wsize, and there is a hard cap of ~16M. When unix extensions are not enabled, the default is set to 60k. According to MS-CIFS, Windows servers can only send a max of 60k at a time, so this is more efficient than requesting a larger size. If the user wishes however, the max can be extended up to 128k - the length of the READ_RSP header. Really old servers however require a special hack to ensure that we don't request too large a read. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 120 ++++++++++++++++++++++++++++------------------ fs/cifs/file.c | 14 ++++-- 2 files changed, 84 insertions(+), 50 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3d518b9e8c18..06dfaacfea5d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2310,16 +2310,16 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) (new->mnt_cifs_flags & CIFS_MOUNT_MASK)) return 0; - if (old->rsize != new->rsize) - return 0; - /* - * We want to share sb only if we don't specify wsize or specified wsize - * is greater or equal than existing one. + * We want to share sb only if we don't specify an r/wsize or + * specified r/wsize is greater than or equal to existing one. */ if (new->wsize && new->wsize < old->wsize) return 0; + if (new->rsize && new->rsize < old->rsize) + return 0; + if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid) return 0; @@ -2757,14 +2757,6 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, CIFS_MOUNT_POSIX_PATHS; } - if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) { - if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { - cifs_sb->rsize = 127 * 1024; - cFYI(DBG2, "larger reads not supported by srv"); - } - } - - cFYI(1, "Negotiate caps 0x%x", (int)cap); #ifdef CONFIG_CIFS_DEBUG2 if (cap & CIFS_UNIX_FCNTL_CAP) @@ -2809,27 +2801,11 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, spin_lock_init(&cifs_sb->tlink_tree_lock); cifs_sb->tlink_tree = RB_ROOT; - if (pvolume_info->rsize > CIFSMaxBufSize) { - cERROR(1, "rsize %d too large, using MaxBufSize", - pvolume_info->rsize); - cifs_sb->rsize = CIFSMaxBufSize; - } else if ((pvolume_info->rsize) && - (pvolume_info->rsize <= CIFSMaxBufSize)) - cifs_sb->rsize = pvolume_info->rsize; - else /* default */ - cifs_sb->rsize = CIFSMaxBufSize; - - if (cifs_sb->rsize < 2048) { - cifs_sb->rsize = 2048; - /* Windows ME may prefer this */ - cFYI(1, "readsize set to minimum: 2048"); - } - /* - * Temporarily set wsize for matching superblock. If we end up using - * new sb then cifs_negotiate_wsize will later negotiate it downward - * if needed. + * Temporarily set r/wsize for matching superblock. If we end up using + * new sb then client will later negotiate it downward if needed. */ + cifs_sb->rsize = pvolume_info->rsize; cifs_sb->wsize = pvolume_info->wsize; cifs_sb->mnt_uid = pvolume_info->linux_uid; @@ -2904,29 +2880,41 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, } /* - * When the server supports very large writes via POSIX extensions, we can - * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including - * the RFC1001 length. + * When the server supports very large reads and writes via POSIX extensions, + * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not + * including the RFC1001 length. * * Note that this might make for "interesting" allocation problems during * writeback however as we have to allocate an array of pointers for the * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. + * + * For reads, there is a similar problem as we need to allocate an array + * of kvecs to handle the receive, though that should only need to be done + * once. */ #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) /* - * When the server doesn't allow large posix writes, only allow a wsize of - * 2^17-1 minus the size of the WRITE_AND_X header. That allows for a write up - * to the maximum size described by RFC1002. + * When the server doesn't allow large posix writes, only allow a rsize/wsize + * of 2^17-1 minus the size of the call header. That allows for a read or + * write up to the maximum size described by RFC1002. */ #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) /* * The default wsize is 1M. find_get_pages seems to return a maximum of 256 * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill * a single wsize request with a single call. */ -#define CIFS_DEFAULT_WSIZE (1024 * 1024) +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +/* + * Windows only supports a max of 60k reads. Default to that when posix + * extensions aren't in force. + */ +#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) @@ -2934,7 +2922,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : - CIFS_DEFAULT_WSIZE; + CIFS_DEFAULT_IOSIZE; /* can server support 24-bit write sizes? (via UNIX extensions) */ if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) @@ -2957,6 +2945,50 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) return wsize; } +static unsigned int +cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) +{ + __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int rsize, defsize; + + /* + * Set default value... + * + * HACK alert! Ancient servers have very small buffers. Even though + * MS-CIFS indicates that servers are only limited by the client's + * bufsize for reads, testing against win98se shows that it throws + * INVALID_PARAMETER errors if you try to request too large a read. + * + * If the server advertises a MaxBufferSize of less than one page, + * assume that it also can't satisfy reads larger than that either. + * + * FIXME: Is there a better heuristic for this? + */ + if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) + defsize = CIFS_DEFAULT_IOSIZE; + else if (server->capabilities & CAP_LARGE_READ_X) + defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; + else if (server->maxBuf >= PAGE_CACHE_SIZE) + defsize = CIFSMaxBufSize; + else + defsize = server->maxBuf - sizeof(READ_RSP); + + rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize; + + /* + * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to + * the client's MaxBufferSize. + */ + if (!(server->capabilities & CAP_LARGE_READ_X)) + rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + + /* hard limit of CIFS_MAX_RSIZE */ + rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); + + return rsize; +} + static int is_path_accessible(int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) @@ -3234,14 +3266,8 @@ try_mount_again: CIFSSMBQFSAttributeInfo(xid, tcon); } - if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { - cifs_sb->rsize = 1024 * 127; - cFYI(DBG2, "no very large read support, rsize now 127K"); - } - if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) - cifs_sb->rsize = min(cifs_sb->rsize, CIFSMaxBufSize); - cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); + cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); remote_path_check: #ifdef CONFIG_CIFS_DFS_UPCALL diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8f6917816fec..a3b545ff5250 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1758,6 +1758,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, struct smb_com_read_rsp *pSMBr; struct cifs_io_parms io_parms; char *read_data; + unsigned int rsize; __u32 pid; if (!nr_segs) @@ -1770,6 +1771,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, xid = GetXid(); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + /* FIXME: set up handlers for larger reads and/or convert to async */ + rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); + open_file = file->private_data; pTcon = tlink_tcon(open_file->tlink); @@ -1782,7 +1786,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, cFYI(1, "attempting read on write only file instance"); for (total_read = 0; total_read < len; total_read += bytes_read) { - cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); + cur_len = min_t(const size_t, len - total_read, rsize); rc = -EAGAIN; read_data = NULL; @@ -1874,6 +1878,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, unsigned int bytes_read = 0; unsigned int total_read; unsigned int current_read_size; + unsigned int rsize; struct cifs_sb_info *cifs_sb; struct cifs_tcon *pTcon; int xid; @@ -1886,6 +1891,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, xid = GetXid(); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + /* FIXME: set up handlers for larger reads and/or convert to async */ + rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); + if (file->private_data == NULL) { rc = -EBADF; FreeXid(xid); @@ -1905,8 +1913,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, for (total_read = 0, current_offset = read_data; read_size > total_read; total_read += bytes_read, current_offset += bytes_read) { - current_read_size = min_t(uint, read_size - total_read, - cifs_sb->rsize); + current_read_size = min_t(uint, read_size - total_read, rsize); + /* For windows me and 9x we do not want to request more than it negotiated since it will refuse the read then */ if ((pTcon->ses) && From 66bfaadc3da74fecb4ba8b03f6b81d5f58b031fa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:35 -0400 Subject: [PATCH 14/16] cifs: tune bdi.ra_pages in accordance with the rsize Tune bdi.ra_pages to be a multiple of the rsize. This prevents the VFS from asking for pages that require small reads to satisfy. Reviewed-and-Tested-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 06dfaacfea5d..f70d87d6ba61 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3182,6 +3182,22 @@ cifs_get_volume_info(char *mount_data, const char *devname) return volume_info; } +/* make sure ra_pages is a multiple of rsize */ +static inline unsigned int +cifs_ra_pages(struct cifs_sb_info *cifs_sb) +{ + unsigned int reads; + unsigned int rsize_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; + + if (rsize_pages >= default_backing_dev_info.ra_pages) + return default_backing_dev_info.ra_pages; + else if (rsize_pages == 0) + return rsize_pages; + + reads = default_backing_dev_info.ra_pages / rsize_pages; + return reads * rsize_pages; +} + int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { @@ -3200,8 +3216,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) if (rc) return rc; - cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; - #ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ @@ -3269,6 +3283,9 @@ try_mount_again: cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); + /* tune readahead according to rsize */ + cifs_sb->bdi.ra_pages = cifs_ra_pages(cifs_sb); + remote_path_check: #ifdef CONFIG_CIFS_DFS_UPCALL /* From fef33df88bef6810cc3c4e6edf55c741a8fd68e3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:37 -0400 Subject: [PATCH 15/16] cifs: allow cifs_max_pending to be readable under /sys/module/cifs/parameters Signed-off-by: Jeff Layton --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b0a2e1647390..fa6724562a41 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -74,7 +74,7 @@ module_param(cifs_min_small, int, 0); MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " "Range: 2 to 256"); unsigned int cifs_max_pending = CIFS_MAX_REQ; -module_param(cifs_max_pending, int, 0); +module_param(cifs_max_pending, int, 0444); MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " "Default: 50 Range: 2 to 256"); unsigned short echo_retries = 5; From f06ac72e929115f2772c29727152ba0832d641e4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Oct 2011 15:30:40 -0400 Subject: [PATCH 16/16] cifs, freezer: add wait_event_freezekillable and have cifs use it CIFS currently uses wait_event_killable to put tasks to sleep while they await replies from the server. That function though does not allow the freezer to run. In many cases, the network interface may be going down anyway, in which case the reply will never come. The client then ends up blocking the computer from suspending. Fix this by adding a new wait_event_freezable variant -- wait_event_freezekillable. The idea is to combine the behavior of wait_event_killable and wait_event_freezable -- put the task to sleep and only allow it to be awoken by fatal signals, but also allow the freezer to do its job. Signed-off-by: Jeff Layton --- fs/cifs/transport.c | 3 ++- include/linux/freezer.h | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e7398d0cd054..0cc9584f5889 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -324,7 +325,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; - error = wait_event_killable(server->response_q, + error = wait_event_freezekillable(server->response_q, midQ->midState != MID_REQUEST_SUBMITTED); if (error < 0) return -ERESTARTSYS; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 1effc8b56b4e..3672f731f03a 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -134,10 +134,25 @@ static inline void set_freezable_with_signal(void) } /* - * Freezer-friendly wrappers around wait_event_interruptible() and - * wait_event_interruptible_timeout(), originally defined in + * Freezer-friendly wrappers around wait_event_interruptible(), + * wait_event_killable() and wait_event_interruptible_timeout(), originally + * defined in */ +#define wait_event_freezekillable(wq, condition) \ +({ \ + int __retval; \ + do { \ + __retval = wait_event_killable(wq, \ + (condition) || freezing(current)); \ + if (__retval && !freezing(current)) \ + break; \ + else if (!(condition)) \ + __retval = -ERESTARTSYS; \ + } while (try_to_freeze()); \ + __retval; \ +}) + #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \