This pull request covers what's left for 4.6. Notably, it includes a

significant 3D performance improvement and a fix to HDMI hotplug
 detection for the Pi2/3.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABCgAGBQJW5vwnAAoJELXWKTbR/J7odK8P/iz8IxxPBwKUHvROxH1nZxbJ
 1NMs6VlJGed+B7EzKKEDGKGYrvdJaLMjJdpUBB7lbywvgEMW8r7h9RQr8YWiNv7f
 ZRjEJ3DL2tegwO4JtaNicKSWFqvUFKlEzg65iMiL25tbW0YDvEvjKXQjp8VDbTsX
 dgdFWchUbS70JHkIOKQQtKzzSMfmtgfI7DZ2a0N78RDwq7JShgELzeNeX89m0KAO
 5g3C/khpEqvTRcEuLgq96WnlibBJw42amSbv8fjtiCtDJVCQg3cS3iicrTil3nie
 OK7P7YsJlCiNTNu0U2ZhYpNaem1hCQFH/qc57Fx/jlEFpgg4spyFSHCHQ7yY1Z5A
 RIlM+wN4U3LzEqHoMC8vXWrUXlAHadiHUn4yVK3BELLDprCUTHZ20mgl9AP5jm34
 wk1bNJ7hpWUiaHCIyptirj9I961+lrtMJO7Y8tFk/7xYW4Y49baHbPpBOnzB59ab
 iMumgVS+8Kv4e5BATkXfLLKyH4iU5IxK63F3VA1AayVc0L5hELSuCbC14A7dXbTZ
 ZVblIK0bGQ7BnyIzDPYzCGF8Iv5VTH89NHFXPtNy/bvfyZk7Qh7EpPzwoEplYwLL
 d4yr4oqyhnqOT7WUzp+YJoI7C2dBOWYm7jveLfevGomdg4PSR9m21muq4JQgM7Jj
 YMcldIFgeEfT5GkOMtPY
 =SJqo
 -----END PGP SIGNATURE-----

Merge tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux into drm-next

This pull request covers what's left for 4.6.  Notably, it includes a
significant 3D performance improvement and a fix to HDMI hotplug
detection for the Pi2/3.

* tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux:
  drm/vc4: Recognize a more specific compatible string for V3D.
  dt-bindings: Add binding docs for V3D.
  drm/vc4: Return -EFAULT on copy_from_user() failure
  drm/vc4: Respect GPIO_ACTIVE_LOW on HDMI HPD if set in the devicetree.
  drm/vc4: Let gpiolib know that we're OK with sleeping for HPD.
  drm/vc4: improve throughput by pipelining binning and rendering jobs
This commit is contained in:
Dave Airlie 2016-03-15 09:49:19 +10:00
commit 211afd577a
7 changed files with 194 additions and 58 deletions

View file

@ -35,6 +35,12 @@ Optional properties for HDMI:
as an interrupt/status bit in the HDMI controller
itself). See bindings/pinctrl/brcm,bcm2835-gpio.txt
Required properties for V3D:
- compatible: Should be "brcm,bcm2835-v3d"
- reg: Physical base address and length of the V3D's registers
- interrupts: The interrupt number
See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
Example:
pixelvalve@7e807000 {
compatible = "brcm,bcm2835-pixelvalve2";
@ -60,6 +66,12 @@ hdmi: hdmi@7e902000 {
clock-names = "pixel", "hdmi";
};
v3d: v3d@7ec00000 {
compatible = "brcm,bcm2835-v3d";
reg = <0x7ec00000 0x1000>;
interrupts = <1 10>;
};
vc4: gpu {
compatible = "brcm,bcm2835-vc4";
};

View file

@ -499,11 +499,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(bo))
return PTR_ERR(bo);
ret = copy_from_user(bo->base.vaddr,
if (copy_from_user(bo->base.vaddr,
(void __user *)(uintptr_t)args->data,
args->size);
if (ret != 0)
args->size)) {
ret = -EFAULT;
goto fail;
}
/* Clear the rest of the memory from allocating from the BO
* cache.
*/

View file

@ -52,7 +52,7 @@ struct vc4_dev {
/* Protects bo_cache and the BO stats. */
struct mutex bo_lock;
/* Sequence number for the last job queued in job_list.
/* Sequence number for the last job queued in bin_job_list.
* Starts at 0 (no jobs emitted).
*/
uint64_t emit_seqno;
@ -62,11 +62,19 @@ struct vc4_dev {
*/
uint64_t finished_seqno;
/* List of all struct vc4_exec_info for jobs to be executed.
* The first job in the list is the one currently programmed
* into ct0ca/ct1ca for execution.
/* List of all struct vc4_exec_info for jobs to be executed in
* the binner. The first job in the list is the one currently
* programmed into ct0ca for execution.
*/
struct list_head job_list;
struct list_head bin_job_list;
/* List of all struct vc4_exec_info for jobs that have
* completed binning and are ready for rendering. The first
* job in the list is the one currently programmed into ct1ca
* for execution.
*/
struct list_head render_job_list;
/* List of the finished vc4_exec_infos waiting to be freed by
* job_done_work.
*/
@ -296,11 +304,20 @@ struct vc4_exec_info {
};
static inline struct vc4_exec_info *
vc4_first_job(struct vc4_dev *vc4)
vc4_first_bin_job(struct vc4_dev *vc4)
{
if (list_empty(&vc4->job_list))
if (list_empty(&vc4->bin_job_list))
return NULL;
return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head);
}
static inline struct vc4_exec_info *
vc4_first_render_job(struct vc4_dev *vc4)
{
if (list_empty(&vc4->render_job_list))
return NULL;
return list_first_entry(&vc4->render_job_list,
struct vc4_exec_info, head);
}
/**
@ -414,7 +431,9 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void vc4_submit_next_job(struct drm_device *dev);
void vc4_submit_next_bin_job(struct drm_device *dev);
void vc4_submit_next_render_job(struct drm_device *dev);
void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
uint64_t timeout_ns, bool interruptible);
void vc4_job_handle_completed(struct vc4_dev *vc4);

View file

@ -141,10 +141,10 @@ vc4_save_hang_state(struct drm_device *dev)
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_get_hang_state *state;
struct vc4_hang_state *kernel_state;
struct vc4_exec_info *exec;
struct vc4_exec_info *exec[2];
struct vc4_bo *bo;
unsigned long irqflags;
unsigned int i, unref_list_count;
unsigned int i, j, unref_list_count, prev_idx;
kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
if (!kernel_state)
@ -153,37 +153,55 @@ vc4_save_hang_state(struct drm_device *dev)
state = &kernel_state->user_state;
spin_lock_irqsave(&vc4->job_lock, irqflags);
exec = vc4_first_job(vc4);
if (!exec) {
exec[0] = vc4_first_bin_job(vc4);
exec[1] = vc4_first_render_job(vc4);
if (!exec[0] && !exec[1]) {
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
return;
}
unref_list_count = 0;
list_for_each_entry(bo, &exec->unref_list, unref_head)
unref_list_count++;
/* Get the bos from both binner and renderer into hang state. */
state->bo_count = 0;
for (i = 0; i < 2; i++) {
if (!exec[i])
continue;
unref_list_count = 0;
list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
unref_list_count++;
state->bo_count += exec[i]->bo_count + unref_list_count;
}
kernel_state->bo = kcalloc(state->bo_count,
sizeof(*kernel_state->bo), GFP_ATOMIC);
state->bo_count = exec->bo_count + unref_list_count;
kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
GFP_ATOMIC);
if (!kernel_state->bo) {
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
return;
}
for (i = 0; i < exec->bo_count; i++) {
drm_gem_object_reference(&exec->bo[i]->base);
kernel_state->bo[i] = &exec->bo[i]->base;
prev_idx = 0;
for (i = 0; i < 2; i++) {
if (!exec[i])
continue;
for (j = 0; j < exec[i]->bo_count; j++) {
drm_gem_object_reference(&exec[i]->bo[j]->base);
kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
}
list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
drm_gem_object_reference(&bo->base.base);
kernel_state->bo[j + prev_idx] = &bo->base.base;
j++;
}
prev_idx = j + 1;
}
list_for_each_entry(bo, &exec->unref_list, unref_head) {
drm_gem_object_reference(&bo->base.base);
kernel_state->bo[i] = &bo->base.base;
i++;
}
state->start_bin = exec->ct0ca;
state->start_render = exec->ct1ca;
if (exec[0])
state->start_bin = exec[0]->ct0ca;
if (exec[1])
state->start_render = exec[1]->ct1ca;
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
@ -267,13 +285,15 @@ vc4_hangcheck_elapsed(unsigned long data)
struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t ct0ca, ct1ca;
unsigned long irqflags;
struct vc4_exec_info *exec;
struct vc4_exec_info *bin_exec, *render_exec;
spin_lock_irqsave(&vc4->job_lock, irqflags);
exec = vc4_first_job(vc4);
bin_exec = vc4_first_bin_job(vc4);
render_exec = vc4_first_render_job(vc4);
/* If idle, we can stop watching for hangs. */
if (!exec) {
if (!bin_exec && !render_exec) {
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
return;
}
@ -284,9 +304,12 @@ vc4_hangcheck_elapsed(unsigned long data)
/* If we've made any progress in execution, rearm the timer
* and wait.
*/
if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
exec->last_ct0ca = ct0ca;
exec->last_ct1ca = ct1ca;
if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
(render_exec && ct1ca != render_exec->last_ct1ca)) {
if (bin_exec)
bin_exec->last_ct0ca = ct0ca;
if (render_exec)
render_exec->last_ct1ca = ct1ca;
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
vc4_queue_hangcheck(dev);
return;
@ -386,11 +409,13 @@ vc4_flush_caches(struct drm_device *dev)
* The job_lock should be held during this.
*/
void
vc4_submit_next_job(struct drm_device *dev)
vc4_submit_next_bin_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_job(vc4);
struct vc4_exec_info *exec;
again:
exec = vc4_first_bin_job(vc4);
if (!exec)
return;
@ -400,11 +425,40 @@ vc4_submit_next_job(struct drm_device *dev)
V3D_WRITE(V3D_BPOA, 0);
V3D_WRITE(V3D_BPOS, 0);
if (exec->ct0ca != exec->ct0ea)
/* Either put the job in the binner if it uses the binner, or
* immediately move it to the to-be-rendered queue.
*/
if (exec->ct0ca != exec->ct0ea) {
submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
} else {
vc4_move_job_to_render(dev, exec);
goto again;
}
}
void
vc4_submit_next_render_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_render_job(vc4);
if (!exec)
return;
submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
}
void
vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
bool was_empty = list_empty(&vc4->render_job_list);
list_move_tail(&exec->head, &vc4->render_job_list);
if (was_empty)
vc4_submit_next_render_job(dev);
}
static void
vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
{
@ -443,14 +497,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
exec->seqno = seqno;
vc4_update_bo_seqnos(exec, seqno);
list_add_tail(&exec->head, &vc4->job_list);
list_add_tail(&exec->head, &vc4->bin_job_list);
/* If no job was executing, kick ours off. Otherwise, it'll
* get started when the previous job's frame done interrupt
* get started when the previous job's flush done interrupt
* occurs.
*/
if (vc4_first_job(vc4) == exec) {
vc4_submit_next_job(dev);
if (vc4_first_bin_job(vc4) == exec) {
vc4_submit_next_bin_job(dev);
vc4_queue_hangcheck(dev);
}
@ -859,7 +913,8 @@ vc4_gem_init(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
INIT_LIST_HEAD(&vc4->job_list);
INIT_LIST_HEAD(&vc4->bin_job_list);
INIT_LIST_HEAD(&vc4->render_job_list);
INIT_LIST_HEAD(&vc4->job_done_list);
INIT_LIST_HEAD(&vc4->seqno_cb_list);
spin_lock_init(&vc4->job_lock);

View file

@ -47,6 +47,7 @@ struct vc4_hdmi {
void __iomem *hdmicore_regs;
void __iomem *hd_regs;
int hpd_gpio;
bool hpd_active_low;
struct clk *pixel_clock;
struct clk *hsm_clock;
@ -166,7 +167,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
struct vc4_dev *vc4 = to_vc4_dev(dev);
if (vc4->hdmi->hpd_gpio) {
if (gpio_get_value(vc4->hdmi->hpd_gpio))
if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^
vc4->hdmi->hpd_active_low)
return connector_status_connected;
else
return connector_status_disconnected;
@ -517,11 +519,17 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
* we'll use the HDMI core's register.
*/
if (of_find_property(dev->of_node, "hpd-gpios", &value)) {
hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0);
enum of_gpio_flags hpd_gpio_flags;
hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node,
"hpd-gpios", 0,
&hpd_gpio_flags);
if (hdmi->hpd_gpio < 0) {
ret = hdmi->hpd_gpio;
goto err_unprepare_hsm;
}
hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
}
vc4->hdmi = hdmi;

View file

@ -30,6 +30,10 @@
* disables that specific interrupt, and 0s written are ignored
* (reading either one returns the set of enabled interrupts).
*
* When we take a binning flush done interrupt, we need to submit the
* next frame for binning and move the finished frame to the render
* thread.
*
* When we take a render frame interrupt, we need to wake the
* processes waiting for some frame to be done, and get the next frame
* submitted ASAP (so the hardware doesn't sit idle when there's work
@ -44,6 +48,7 @@
#include "vc4_regs.h"
#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
V3D_INT_FLDONE | \
V3D_INT_FRDONE)
DECLARE_WAIT_QUEUE_HEAD(render_wait);
@ -77,7 +82,7 @@ vc4_overflow_mem_work(struct work_struct *work)
unsigned long irqflags;
spin_lock_irqsave(&vc4->job_lock, irqflags);
current_exec = vc4_first_job(vc4);
current_exec = vc4_first_bin_job(vc4);
if (current_exec) {
vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
list_add_tail(&vc4->overflow_mem->unref_head,
@ -98,17 +103,43 @@ vc4_overflow_mem_work(struct work_struct *work)
}
static void
vc4_irq_finish_job(struct drm_device *dev)
vc4_irq_finish_bin_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_job(vc4);
struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
if (!exec)
return;
vc4_move_job_to_render(dev, exec);
vc4_submit_next_bin_job(dev);
}
static void
vc4_cancel_bin_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
if (!exec)
return;
list_move_tail(&exec->head, &vc4->bin_job_list);
vc4_submit_next_bin_job(dev);
}
static void
vc4_irq_finish_render_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_render_job(vc4);
if (!exec)
return;
vc4->finished_seqno++;
list_move_tail(&exec->head, &vc4->job_done_list);
vc4_submit_next_job(dev);
vc4_submit_next_render_job(dev);
wake_up_all(&vc4->job_wait_queue);
schedule_work(&vc4->job_done_work);
@ -125,9 +156,10 @@ vc4_irq(int irq, void *arg)
barrier();
intctl = V3D_READ(V3D_INTCTL);
/* Acknowledge the interrupts we're handling here. The render
* frame done interrupt will be cleared, while OUTOMEM will
* stay high until the underlying cause is cleared.
/* Acknowledge the interrupts we're handling here. The binner
* last flush / render frame done interrupt will be cleared,
* while OUTOMEM will stay high until the underlying cause is
* cleared.
*/
V3D_WRITE(V3D_INTCTL, intctl);
@ -138,9 +170,16 @@ vc4_irq(int irq, void *arg)
status = IRQ_HANDLED;
}
if (intctl & V3D_INT_FLDONE) {
spin_lock(&vc4->job_lock);
vc4_irq_finish_bin_job(dev);
spin_unlock(&vc4->job_lock);
status = IRQ_HANDLED;
}
if (intctl & V3D_INT_FRDONE) {
spin_lock(&vc4->job_lock);
vc4_irq_finish_job(dev);
vc4_irq_finish_render_job(dev);
spin_unlock(&vc4->job_lock);
status = IRQ_HANDLED;
}
@ -205,6 +244,7 @@ void vc4_irq_reset(struct drm_device *dev)
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
spin_lock_irqsave(&vc4->job_lock, irqflags);
vc4_irq_finish_job(dev);
vc4_cancel_bin_job(dev);
vc4_irq_finish_render_job(dev);
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}

View file

@ -268,6 +268,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
}
static const struct of_device_id vc4_v3d_dt_match[] = {
{ .compatible = "brcm,bcm2835-v3d" },
{ .compatible = "brcm,vc4-v3d" },
{}
};