From 045da4e55581d9b4de135bbdbdd1b7fa98dc18a9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 29 Oct 2011 00:22:49 +1000 Subject: [PATCH] drm/nvc0/pm: initial engine reclocking Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_pm.h | 2 + drivers/gpu/drm/nouveau/nouveau_state.c | 4 + drivers/gpu/drm/nouveau/nvc0_pm.c | 237 ++++++++++++++++++++++++ 3 files changed, 243 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.h b/drivers/gpu/drm/nouveau/nouveau_pm.h index 7e0cc2eeb307..2f8e14fbcff8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_pm.h +++ b/drivers/gpu/drm/nouveau/nouveau_pm.h @@ -72,6 +72,8 @@ int nva3_pm_clocks_set(struct drm_device *, void *); /* nvc0_pm.c */ int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *); +void *nvc0_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *); +int nvc0_pm_clocks_set(struct drm_device *, void *); /* nouveau_temp.c */ void nouveau_temp_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 57ccda47a70b..f5e98910d17f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -417,6 +417,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->vram.flags_valid = nvc0_vram_flags_valid; engine->pm.temp_get = nv84_temp_get; engine->pm.clocks_get = nvc0_pm_clocks_get; + engine->pm.clocks_pre = nvc0_pm_clocks_pre; + engine->pm.clocks_set = nvc0_pm_clocks_set; engine->pm.voltage_get = nouveau_voltage_gpio_get; engine->pm.voltage_set = nouveau_voltage_gpio_set; engine->pm.pwm_get = nv50_pm_pwm_get; @@ -468,6 +470,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->vram.flags_valid = nvc0_vram_flags_valid; engine->pm.temp_get = nv84_temp_get; engine->pm.clocks_get = nvc0_pm_clocks_get; + engine->pm.clocks_pre = nvc0_pm_clocks_pre; + engine->pm.clocks_set = nvc0_pm_clocks_set; engine->pm.voltage_get = nouveau_voltage_gpio_get; engine->pm.voltage_set = nouveau_voltage_gpio_set; break; diff --git a/drivers/gpu/drm/nouveau/nvc0_pm.c b/drivers/gpu/drm/nouveau/nvc0_pm.c index 929aded35cb5..e9992f62c1c0 100644 --- a/drivers/gpu/drm/nouveau/nvc0_pm.c +++ b/drivers/gpu/drm/nouveau/nvc0_pm.c @@ -153,3 +153,240 @@ nvc0_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl) perflvl->vdec = read_clk(dev, 0x0e); return 0; } + +struct nvc0_pm_clock { + u32 freq; + u32 ssel; + u32 mdiv; + u32 dsrc; + u32 ddiv; + u32 coef; +}; + +struct nvc0_pm_state { + struct nvc0_pm_clock eng[16]; +}; + +static u32 +calc_div(struct drm_device *dev, int clk, u32 ref, u32 freq, u32 *ddiv) +{ + u32 div = min((ref * 2) / freq, (u32)65); + if (div < 2) + div = 2; + + *ddiv = div - 2; + return (ref * 2) / div; +} + +static u32 +calc_src(struct drm_device *dev, int clk, u32 freq, u32 *dsrc, u32 *ddiv) +{ + u32 sclk; + + /* use one of the fixed frequencies if possible */ + *ddiv = 0x00000000; + switch (freq) { + case 27000: + case 108000: + *dsrc = 0x00000000; + if (freq == 108000) + *dsrc |= 0x00030000; + return freq; + case 100000: + *dsrc = 0x00000002; + return freq; + default: + *dsrc = 0x00000003; + break; + } + + /* otherwise, calculate the closest divider */ + sclk = read_vco(dev, clk); + if (clk < 7) + sclk = calc_div(dev, clk, sclk, freq, ddiv); + return sclk; +} + +static u32 +calc_pll(struct drm_device *dev, int clk, u32 freq, u32 *coef) +{ + struct pll_lims limits; + int N, M, P, ret; + + ret = get_pll_limits(dev, 0x137000 + (clk * 0x20), &limits); + if (ret) + return 0; + + limits.refclk = read_div(dev, clk, 0x137120, 0x137140); + if (!limits.refclk) + return 0; + + ret = nva3_calc_pll(dev, &limits, freq, &N, NULL, &M, &P); + if (ret <= 0) + return 0; + + *coef = (P << 16) | (N << 8) | M; + return ret; +} + +/* A (likely rather simplified and incomplete) view of the clock tree + * + * Key: + * + * S: source select + * D: divider + * P: pll + * F: switch + * + * Engine clocks: + * + * 137250(D) ---- 137100(F0) ---- 137160(S)/1371d0(D) ------------------- ref + * (F1) ---- 1370X0(P) ---- 137120(S)/137140(D) ---- ref + * + * Not all registers exist for all clocks. For example: clocks >= 8 don't + * have their own PLL (all tied to clock 7's PLL when in PLL mode), nor do + * they have the divider at 1371d0, though the source selection at 137160 + * still exists. You must use the divider at 137250 for these instead. + * + * Memory clock: + * + * TBD, read_mem() above is likely very wrong... + * + */ + +static int +calc_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info, u32 freq) +{ + u32 src0, div0, div1D, div1P = 0; + u32 clk0, clk1 = 0; + + /* invalid clock domain */ + if (!freq) + return 0; + + /* first possible path, using only dividers */ + clk0 = calc_src(dev, clk, freq, &src0, &div0); + clk0 = calc_div(dev, clk, clk0, freq, &div1D); + + /* see if we can get any closer using PLLs */ + if (clk0 != freq) { + if (clk < 7) + clk1 = calc_pll(dev, clk, freq, &info->coef); + else + clk1 = read_pll(dev, 0x1370e0); + clk1 = calc_div(dev, clk, clk1, freq, &div1P); + } + + /* select the method which gets closest to target freq */ + if (abs((int)freq - clk0) <= abs((int)freq - clk1)) { + info->dsrc = src0; + if (div0) { + info->ddiv |= 0x80000000; + info->ddiv |= div0 << 8; + info->ddiv |= div0; + } + if (div1D) { + info->mdiv |= 0x80000000; + info->mdiv |= div1D; + } + info->ssel = 0; + info->freq = clk0; + } else { + if (div1P) { + info->mdiv |= 0x80000000; + info->mdiv |= div1P << 8; + } + info->ssel = (1 << clk); + info->freq = clk1; + } + + return 0; +} + +void * +nvc0_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nvc0_pm_state *info; + int ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + /* NFI why this is still in the performance table, the ROPCs appear + * to get their clock from clock 2 ("hub07", actually hub05 on this + * chip, but, anyway...) as well. nvatiming confirms hub05 and ROP + * are always the same freq with the binary driver even when the + * performance table says they should differ. + */ + if (dev_priv->chipset == 0xd9) + perflvl->rop = 0; + + if ((ret = calc_clk(dev, 0x00, &info->eng[0x00], perflvl->shader)) || + (ret = calc_clk(dev, 0x01, &info->eng[0x01], perflvl->rop)) || + (ret = calc_clk(dev, 0x02, &info->eng[0x02], perflvl->hub07)) || + (ret = calc_clk(dev, 0x07, &info->eng[0x07], perflvl->hub06)) || + (ret = calc_clk(dev, 0x08, &info->eng[0x08], perflvl->hub01)) || + (ret = calc_clk(dev, 0x09, &info->eng[0x09], perflvl->copy)) || + (ret = calc_clk(dev, 0x0c, &info->eng[0x0c], perflvl->daemon)) || + (ret = calc_clk(dev, 0x0e, &info->eng[0x0e], perflvl->vdec))) { + kfree(info); + return ERR_PTR(ret); + } + + return info; +} + +static void +prog_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info) +{ + /* program dividers at 137160/1371d0 first */ + if (clk < 7 && !info->ssel) { + nv_mask(dev, 0x1371d0 + (clk * 0x04), 0x80003f3f, info->ddiv); + nv_wr32(dev, 0x137160 + (clk * 0x04), info->dsrc); + } + + /* switch clock to non-pll mode */ + nv_mask(dev, 0x137100, (1 << clk), 0x00000000); + nv_wait(dev, 0x137100, (1 << clk), 0x00000000); + + /* reprogram pll */ + if (clk < 7) { + /* make sure it's disabled first... */ + u32 base = 0x137000 + (clk * 0x20); + u32 ctrl = nv_rd32(dev, base + 0x00); + if (ctrl & 0x00000001) { + nv_mask(dev, base + 0x00, 0x00000004, 0x00000000); + nv_mask(dev, base + 0x00, 0x00000001, 0x00000000); + } + /* program it to new values, if necessary */ + if (info->ssel) { + nv_wr32(dev, base + 0x04, info->coef); + nv_mask(dev, base + 0x00, 0x00000001, 0x00000001); + nv_wait(dev, base + 0x00, 0x00020000, 0x00020000); + nv_mask(dev, base + 0x00, 0x00020004, 0x00000004); + } + } + + /* select pll/non-pll mode, and program final clock divider */ + nv_mask(dev, 0x137100, (1 << clk), info->ssel); + nv_wait(dev, 0x137100, (1 << clk), info->ssel); + nv_mask(dev, 0x137250 + (clk * 0x04), 0x00003f3f, info->mdiv); +} + +int +nvc0_pm_clocks_set(struct drm_device *dev, void *data) +{ + struct nvc0_pm_state *info = data; + int i; + + for (i = 0; i < 16; i++) { + if (!info->eng[i].freq) + continue; + prog_clk(dev, i, &info->eng[i]); + } + + kfree(info); + return 0; +}