greybus: operation/esx: fix message-cancellation lifetime bugs

The current host-controller message-cancellation implementation suffer from a lifetime bug as dynamically allocated URBs would complete and be deallocated while being unlinked as part of cancellation. The current locking is also insufficient to prevent the related race where the URB is deallocated before being unlinked. Fix this by pushing the cancellation implementation from greybus core down to the host-controller drivers, and replace the "cookie" pointer with a hcpriv field that those drivers can use to maintain their state with the required locking and reference counting in place. Specifically the drivers need to acquire a reference to the URB under a lock before calling usb_kill_urb as part of cancellation. Note that this also removes the insufficient gb_message_mutex, which also effectively prevented us from implementing support for submissions from atomic context. Instead the host-controller drivers must now explicitly make sure that the pre-allocated URBs are not reused while cancellation is in progress. Signed-off-by: Johan Hovold <johan@hovoldconsulting.com> Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2015-07-01 12:37:21 +02:00 · 2015-07-01 12:37:21 +02:00 · 3e136cc9e0
parent 5677d48b97
commit 3e136cc9e0
5 changed files with 130 additions and 72 deletions
--- a/drivers/staging/greybus/es1.c
+++ b/drivers/staging/greybus/es1.c
@ -68,6 +68,8 @@ static DEFINE_KFIFO(apb1_log_fifo, char, APB1_LOG_SIZE);
 * @cport_out_urb: array of urbs for the CPort out messages
 * @cport_out_urb_busy: array of flags to see if the @cport_out_urb is busy or
 *			not.
+ * @cport_out_urb_cancelled: array of flags indicating whether the
+ *			corresponding @cport_out_urb is being cancelled
 * @cport_out_urb_lock: locks the @cport_out_urb_busy "list"
 */
 struct es1_ap_dev {
@ -87,6 +89,7 @@ struct es1_ap_dev {
 	u8 *cport_in_buffer[NUM_CPORT_IN_URB];
 	struct urb *cport_out_urb[NUM_CPORT_OUT_URB];
 	bool cport_out_urb_busy[NUM_CPORT_OUT_URB];
+	bool cport_out_urb_cancelled[NUM_CPORT_OUT_URB];
 	spinlock_t cport_out_urb_lock;
 };

@ -131,7 +134,8 @@ static struct urb *next_free_urb(struct es1_ap_dev *es1, gfp_t gfp_mask)

 	/* Look in our pool of allocated urbs first, as that's the "fastest" */
 	for (i = 0; i < NUM_CPORT_OUT_URB; ++i) {
-		if (es1->cport_out_urb_busy[i] == false) {
+		if (es1->cport_out_urb_busy[i] == false &&
+				es1->cport_out_urb_cancelled[i] == false) {
 			es1->cport_out_urb_busy[i] = true;
 			urb = es1->cport_out_urb[i];
 			break;
@ -199,11 +203,10 @@ static u16 gb_message_cport_unpack(struct gb_operation_msg_hdr *header)
 }

 /*
- * Returns an opaque cookie value if successful, or a pointer coded
- * error otherwise.  If the caller wishes to cancel the in-flight
- * buffer, it must supply the returned cookie to the cancel routine.
+ * Returns zero if the message was successfully queued, or a negative errno
+ * otherwise.
 */
-static void *message_send(struct greybus_host_device *hd, u16 cport_id,
+static int message_send(struct greybus_host_device *hd, u16 cport_id,
 			struct gb_message *message, gfp_t gfp_mask)
 {
 	struct es1_ap_dev *es1 = hd_to_es1(hd);
@ -211,6 +214,7 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	size_t buffer_size;
 	int retval;
 	struct urb *urb;
+	unsigned long flags;

 	/*
 	 * The data actually transferred will include an indication
@ -219,13 +223,17 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	 */
 	if (!cport_id_valid(cport_id)) {
 		pr_err("invalid destination cport 0x%02x\n", cport_id);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}

 	/* Find a free urb */
 	urb = next_free_urb(es1, gfp_mask);
 	if (!urb)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
+
+	spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+	message->hcpriv = urb;
+	spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);

 	/* Pack the cport id into the message header */
 	gb_message_cport_pack(message->header, cport_id);
@ -239,30 +247,56 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	retval = usb_submit_urb(urb, gfp_mask);
 	if (retval) {
 		pr_err("error %d submitting URB\n", retval);
+
+		spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+		message->hcpriv = NULL;
+		spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);
+
 		free_urb(es1, urb);
 		gb_message_cport_clear(message->header);
-		return ERR_PTR(retval);
+
+		return retval;
 	}

-	return urb;
+	return 0;
 }

 /*
- * The cookie value supplied is the value that message_send()
- * returned to its caller.  It identifies the message that should be
- * canceled.  This function must also handle (which is to say,
- * ignore) a null cookie value.
+ * Can not be called in atomic context.
 */
-static void message_cancel(void *cookie)
+static void message_cancel(struct gb_message *message)
 {
+	struct greybus_host_device *hd = message->operation->connection->hd;
+	struct es1_ap_dev *es1 = hd_to_es1(hd);
+	struct urb *urb;
+	int i;

-	/*
-	 * We really should be defensive and track all outstanding
-	 * (sent) messages rather than trusting the cookie provided
-	 * is valid.  For the time being, this will do.
-	 */
-	if (cookie)
-		usb_kill_urb(cookie);
+	might_sleep();
+
+	spin_lock_irq(&es1->cport_out_urb_lock);
+	urb = message->hcpriv;
+
+	/* Prevent dynamically allocated urb from being deallocated. */
+	usb_get_urb(urb);
+
+	/* Prevent pre-allocated urb from being reused. */
+	for (i = 0; i < NUM_CPORT_OUT_URB; ++i) {
+		if (urb == es1->cport_out_urb[i]) {
+			es1->cport_out_urb_cancelled[i] = true;
+			break;
+		}
+	}
+	spin_unlock_irq(&es1->cport_out_urb_lock);
+
+	usb_kill_urb(urb);
+
+	if (i < NUM_CPORT_OUT_URB) {
+		spin_lock_irq(&es1->cport_out_urb_lock);
+		es1->cport_out_urb_cancelled[i] = false;
+		spin_unlock_irq(&es1->cport_out_urb_lock);
+	}
+
+	usb_free_urb(urb);
 }

 static struct greybus_host_driver es1_driver = {
@ -418,6 +452,7 @@ static void cport_out_callback(struct urb *urb)
 	struct greybus_host_device *hd = message->operation->connection->hd;
 	struct es1_ap_dev *es1 = hd_to_es1(hd);
 	int status = check_urb_status(urb);
+	unsigned long flags;

 	gb_message_cport_clear(message->header);

@ -427,6 +462,10 @@ static void cport_out_callback(struct urb *urb)
 	 */
 	greybus_message_sent(hd, message, status);

+	spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+	message->hcpriv = NULL;
+	spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);
+
 	free_urb(es1, urb);
 }

--- a/drivers/staging/greybus/es2.c
+++ b/drivers/staging/greybus/es2.c
@ -94,6 +94,8 @@ struct es1_cport_out {
 * @cport_out_urb: array of urbs for the CPort out messages
 * @cport_out_urb_busy: array of flags to see if the @cport_out_urb is busy or
 *			not.
+ * @cport_out_urb_cancelled: array of flags indicating whether the
+ *			corresponding @cport_out_urb is being cancelled
 * @cport_out_urb_lock: locks the @cport_out_urb_busy "list"
 */
 struct es1_ap_dev {
@ -111,6 +113,7 @@ struct es1_ap_dev {
 	struct es1_cport_out cport_out[NUM_BULKS];
 	struct urb *cport_out_urb[NUM_CPORT_OUT_URB];
 	bool cport_out_urb_busy[NUM_CPORT_OUT_URB];
+	bool cport_out_urb_cancelled[NUM_CPORT_OUT_URB];
 	spinlock_t cport_out_urb_lock;

 	int cport_to_ep[CPORT_MAX];
@ -224,7 +227,8 @@ static struct urb *next_free_urb(struct es1_ap_dev *es1, gfp_t gfp_mask)

 	/* Look in our pool of allocated urbs first, as that's the "fastest" */
 	for (i = 0; i < NUM_CPORT_OUT_URB; ++i) {
-		if (es1->cport_out_urb_busy[i] == false) {
+		if (es1->cport_out_urb_busy[i] == false &&
+				es1->cport_out_urb_cancelled[i] == false) {
 			es1->cport_out_urb_busy[i] = true;
 			urb = es1->cport_out_urb[i];
 			break;
@ -292,11 +296,10 @@ static u16 gb_message_cport_unpack(struct gb_operation_msg_hdr *header)
 }

 /*
- * Returns an opaque cookie value if successful, or a pointer coded
- * error otherwise.  If the caller wishes to cancel the in-flight
- * buffer, it must supply the returned cookie to the cancel routine.
+ * Returns zero if the message was successfully queued, or a negative errno
+ * otherwise.
 */
-static void *message_send(struct greybus_host_device *hd, u16 cport_id,
+static int message_send(struct greybus_host_device *hd, u16 cport_id,
 			struct gb_message *message, gfp_t gfp_mask)
 {
 	struct es1_ap_dev *es1 = hd_to_es1(hd);
@ -305,6 +308,7 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	int retval;
 	struct urb *urb;
 	int bulk_ep_set;
+	unsigned long flags;

 	/*
 	 * The data actually transferred will include an indication
@ -313,13 +317,17 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	 */
 	if (!cport_id_valid(cport_id)) {
 		pr_err("invalid destination cport 0x%02x\n", cport_id);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}

 	/* Find a free urb */
 	urb = next_free_urb(es1, gfp_mask);
 	if (!urb)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
+
+	spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+	message->hcpriv = urb;
+	spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);

 	/* Pack the cport id into the message header */
 	gb_message_cport_pack(message->header, cport_id);
@ -335,30 +343,56 @@ static void *message_send(struct greybus_host_device *hd, u16 cport_id,
 	retval = usb_submit_urb(urb, gfp_mask);
 	if (retval) {
 		pr_err("error %d submitting URB\n", retval);
+
+		spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+		message->hcpriv = NULL;
+		spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);
+
 		free_urb(es1, urb);
 		gb_message_cport_clear(message->header);
-		return ERR_PTR(retval);
+
+		return retval;
 	}

-	return urb;
+	return 0;
 }

 /*
- * The cookie value supplied is the value that message_send()
- * returned to its caller.  It identifies the message that should be
- * canceled.  This function must also handle (which is to say,
- * ignore) a null cookie value.
+ * Can not be called in atomic context.
 */
-static void message_cancel(void *cookie)
+static void message_cancel(struct gb_message *message)
 {
+	struct greybus_host_device *hd = message->operation->connection->hd;
+	struct es1_ap_dev *es1 = hd_to_es1(hd);
+	struct urb *urb;
+	int i;

-	/*
-	 * We really should be defensive and track all outstanding
-	 * (sent) messages rather than trusting the cookie provided
-	 * is valid.  For the time being, this will do.
-	 */
-	if (cookie)
-		usb_kill_urb(cookie);
+	might_sleep();
+
+	spin_lock_irq(&es1->cport_out_urb_lock);
+	urb = message->hcpriv;
+
+	/* Prevent dynamically allocated urb from being deallocated. */
+	usb_get_urb(urb);
+
+	/* Prevent pre-allocated urb from being reused. */
+	for (i = 0; i < NUM_CPORT_OUT_URB; ++i) {
+		if (urb == es1->cport_out_urb[i]) {
+			es1->cport_out_urb_cancelled[i] = true;
+			break;
+		}
+	}
+	spin_unlock_irq(&es1->cport_out_urb_lock);
+
+	usb_kill_urb(urb);
+
+	if (i < NUM_CPORT_OUT_URB) {
+		spin_lock_irq(&es1->cport_out_urb_lock);
+		es1->cport_out_urb_cancelled[i] = false;
+		spin_unlock_irq(&es1->cport_out_urb_lock);
+	}
+
+	usb_free_urb(urb);
 }

 static struct greybus_host_driver es1_driver = {
@ -518,6 +552,7 @@ static void cport_out_callback(struct urb *urb)
 	struct greybus_host_device *hd = message->operation->connection->hd;
 	struct es1_ap_dev *es1 = hd_to_es1(hd);
 	int status = check_urb_status(urb);
+	unsigned long flags;

 	gb_message_cport_clear(message->header);

@ -527,6 +562,10 @@ static void cport_out_callback(struct urb *urb)
 	 */
 	greybus_message_sent(hd, message, status);

+	spin_lock_irqsave(&es1->cport_out_urb_lock, flags);
+	message->hcpriv = NULL;
+	spin_unlock_irqrestore(&es1->cport_out_urb_lock, flags);
+
 	free_urb(es1, urb);
 }

--- a/drivers/staging/greybus/greybus.h
+++ b/drivers/staging/greybus/greybus.h
@ -81,9 +81,9 @@ struct svc_msg;
 struct greybus_host_driver {
 	size_t	hd_priv_size;

-	void *(*message_send)(struct greybus_host_device *hd, u16 dest_cport_id,
+	int (*message_send)(struct greybus_host_device *hd, u16 dest_cport_id,
 			struct gb_message *message, gfp_t gfp_mask);
-	void (*message_cancel)(void *cookie);
+	void (*message_cancel)(struct gb_message *message);
 	int (*submit_svc)(struct svc_msg *svc_msg,
 			    struct greybus_host_device *hd);
 };
--- a/drivers/staging/greybus/operation.c
+++ b/drivers/staging/greybus/operation.c
@ -23,9 +23,6 @@ static struct kmem_cache *gb_message_cache;
 /* Workqueue to handle Greybus operation completions. */
 static struct workqueue_struct *gb_operation_workqueue;

-/* Protects the cookie representing whether a message is in flight */
-static DEFINE_MUTEX(gb_message_mutex);
-
 /*
 * Protects access to connection operations lists, as well as
 * updates to operation->errno.
@ -135,21 +132,11 @@ gb_operation_find(struct gb_connection *connection, u16 operation_id)
 static int gb_message_send(struct gb_message *message)
 {
 	struct gb_connection *connection = message->operation->connection;
-	int ret = 0;
-	void *cookie;

-	mutex_lock(&gb_message_mutex);
-	cookie = connection->hd->driver->message_send(connection->hd,
+	return connection->hd->driver->message_send(connection->hd,
 					connection->hd_cport_id,
 					message,
 					GFP_KERNEL);
-	if (IS_ERR(cookie))
-		ret = PTR_ERR(cookie);
-	else
-		message->cookie = cookie;
-	mutex_unlock(&gb_message_mutex);
-
-	return ret;
 }

 /*
@ -157,14 +144,9 @@ static int gb_message_send(struct gb_message *message)
 */
 static void gb_message_cancel(struct gb_message *message)
 {
-	mutex_lock(&gb_message_mutex);
-	if (message->cookie) {
-		struct greybus_host_device *hd;
+	struct greybus_host_device *hd = message->operation->connection->hd;

-		hd = message->operation->connection->hd;
-		hd->driver->message_cancel(message->cookie);
-	}
-	mutex_unlock(&gb_message_mutex);
+	hd->driver->message_cancel(message);
 }

 static void gb_operation_request_handle(struct gb_operation *operation)
@ -719,9 +701,6 @@ void greybus_message_sent(struct greybus_host_device *hd,
 {
 	struct gb_operation *operation;

-	/* Get the message and record that it is no longer in flight */
-	message->cookie = NULL;
-
 	/*
 	 * If the message was a response, we just need to drop our
 	 * reference to the operation.  If an error occurred, report
--- a/drivers/staging/greybus/operation.h
+++ b/drivers/staging/greybus/operation.h
@ -73,19 +73,20 @@ struct gb_operation_msg_hdr {
 #define GB_OPERATION_MESSAGE_SIZE_MAX	U16_MAX

 /*
- * Protocol code should only examine the payload and payload_size
- * fields.  All other fields are intended to be private to the
- * operations core code.
+ * Protocol code should only examine the payload and payload_size fields, and
+ * host-controller drivers may use the hcpriv field. All other fields are
+ * intended to be private to the operations core code.
 */
 struct gb_message {
 	struct gb_operation		*operation;
-	void				*cookie;
 	struct gb_operation_msg_hdr	*header;

 	void				*payload;
 	size_t				payload_size;

 	void				*buffer;
+
+	void				*hcpriv;
 };

 /*