Skip to content

Commit

Permalink
drm/i915: Update memory bandwidth formulae
Browse files Browse the repository at this point in the history
The formulae has been updated to include more variables. Make
sure the code carries the same.

Bspec: 64631, 54023

v2: Make GEN11 follow the default route and fix calculation of
    maxdebw(RK)
v3: Fix div by zero on default case
    Correct indent for fallthrough(Jani)
v4: Fix div by zero on gen11.
v5: Fix 0 max_numchannels case
v6:
    - Split gen11/gen12 algorithms
    - Fix RKL deburst value
    - Fix difference b/ween ICL and TGL algorithms
    - Protect deinterleave from being 0
    - Warn when numchannels exceeds max_numchannels
    - Fix scaling of clk_max from different units
    - s/deinterleave/channelwidth/ in calculating peakbw
    - Fix off by one for num_planes TGL+
    - Fix SAGV check
v7: Fix div by zero error on gen11
v8: Even though the algorithm for gen11 says that we need to return
    derated bw for a qgv point whose planes are less than no of active
    planes, we return 0 for deratedbw when only one plane is allowed.
    We modify the algorithm to accommodate the case where no of active
    planes are same as the min no of planes supported by a qgv point.
v9: Fix dclk scaling for dg1

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211015210041.16858-1-radhakrishna.sripada@intel.com
  • Loading branch information
rkinvictus authored and mattrope committed Nov 4, 2021
1 parent a14fef8 commit c64a9a7
Showing 1 changed file with 179 additions and 32 deletions.
211 changes: 179 additions & 32 deletions drivers/gpu/drm/i915/display/intel_bw.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ struct intel_qgv_info {
u8 num_points;
u8 num_psf_points;
u8 t_bl;
u8 max_numchannels;
u8 channel_width;
u8 deinterleave;
};

static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
Expand All @@ -42,7 +45,7 @@ static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
else
dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
sp->dclk = dclk_ratio * dclk_reference;
sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000);

val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
if (val & DG1_GEAR_TYPE)
Expand All @@ -69,6 +72,7 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
int point)
{
u32 val = 0, val2 = 0;
u16 dclk;
int ret;

ret = sandybridge_pcode_read(dev_priv,
Expand All @@ -78,7 +82,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
if (ret)
return ret;

sp->dclk = val & 0xffff;
dclk = val & 0xffff;
sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
sp->t_rp = (val & 0xff0000) >> 16;
sp->t_rcd = (val & 0xff000000) >> 24;

Expand Down Expand Up @@ -133,7 +138,8 @@ int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
}

static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
struct intel_qgv_info *qi)
struct intel_qgv_info *qi,
bool is_y_tile)
{
const struct dram_info *dram_info = &dev_priv->dram_info;
int i, ret;
Expand All @@ -144,17 +150,41 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
if (DISPLAY_VER(dev_priv) == 12)
switch (dram_info->type) {
case INTEL_DRAM_DDR4:
qi->t_bl = 4;
qi->t_bl = is_y_tile ? 8 : 4;
qi->max_numchannels = 2;
qi->channel_width = 64;
qi->deinterleave = is_y_tile ? 1 : 2;
break;
case INTEL_DRAM_DDR5:
qi->t_bl = 8;
qi->t_bl = is_y_tile ? 16 : 8;
qi->max_numchannels = 4;
qi->channel_width = 32;
qi->deinterleave = is_y_tile ? 1 : 2;
break;
case INTEL_DRAM_LPDDR4:
if (IS_ROCKETLAKE(dev_priv)) {
qi->t_bl = 8;
qi->max_numchannels = 4;
qi->channel_width = 32;
qi->deinterleave = 2;
break;
}
fallthrough;
case INTEL_DRAM_LPDDR5:
qi->t_bl = 16;
qi->max_numchannels = 8;
qi->channel_width = 16;
qi->deinterleave = is_y_tile ? 2 : 4;
break;
default:
qi->t_bl = 16;
qi->max_numchannels = 1;
break;
}
else if (DISPLAY_VER(dev_priv) == 11)
else if (DISPLAY_VER(dev_priv) == 11) {
qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8;
qi->max_numchannels = 1;
}

if (drm_WARN_ON(&dev_priv->drm,
qi->num_points > ARRAY_SIZE(qi->points)))
Expand Down Expand Up @@ -193,12 +223,6 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
return 0;
}

static int icl_calc_bw(int dclk, int num, int den)
{
/* multiples of 16.666MHz (100/6) */
return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
}

static int adl_calc_psf_bw(int clk)
{
/*
Expand Down Expand Up @@ -240,7 +264,7 @@ static const struct intel_sa_info tgl_sa_info = {
};

static const struct intel_sa_info rkl_sa_info = {
.deburst = 16,
.deburst = 8,
.deprogbwlimit = 20, /* GB/s */
.displayrtids = 128,
.derating = 10,
Expand All @@ -265,35 +289,130 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
struct intel_qgv_info qi = {};
bool is_y_tile = true; /* assume y tile may be used */
int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
int deinterleave;
int ipqdepth, ipqdepthpch;
int ipqdepth, ipqdepthpch = 16;
int dclk_max;
int maxdebw;
int num_groups = ARRAY_SIZE(dev_priv->max_bw);
int i, ret;

ret = icl_get_qgv_points(dev_priv, &qi);
ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
if (ret) {
drm_dbg_kms(&dev_priv->drm,
"Failed to get memory subsystem information, ignoring bandwidth limits");
return ret;
}

deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
dclk_max = icl_sagv_max_dclk(&qi);
maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);

for (i = 0; i < num_groups; i++) {
struct intel_bw_info *bi = &dev_priv->max_bw[i];
int clpchgroup;
int j;

clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;

bi->num_qgv_points = qi.num_points;
bi->num_psf_gv_points = qi.num_psf_points;

for (j = 0; j < qi.num_points; j++) {
const struct intel_qgv_point *sp = &qi.points[j];
int ct, bw;

/*
* Max row cycle time
*
* FIXME what is the logic behind the
* assumed burst length?
*/
ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
(clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);

ipqdepthpch = 16;
bi->deratedbw[j] = min(maxdebw,
bw * (100 - sa->derating) / 100);

drm_dbg_kms(&dev_priv->drm,
"BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
i, j, bi->num_planes, bi->deratedbw[j]);
}
}
/*
* In case if SAGV is disabled in BIOS, we always get 1
* SAGV point, but we can't send PCode commands to restrict it
* as it will fail and pointless anyway.
*/
if (qi.num_points == 1)
dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
else
dev_priv->sagv_status = I915_SAGV_ENABLED;

return 0;
}

static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
{
struct intel_qgv_info qi = {};
const struct dram_info *dram_info = &dev_priv->dram_info;
bool is_y_tile = true; /* assume y tile may be used */
int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
int ipqdepth, ipqdepthpch = 16;
int dclk_max;
int maxdebw, peakbw;
int clperchgroup;
int num_groups = ARRAY_SIZE(dev_priv->max_bw);
int i, ret;

ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
if (ret) {
drm_dbg_kms(&dev_priv->drm,
"Failed to get memory subsystem information, ignoring bandwidth limits");
return ret;
}

if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
num_channels *= 2;

qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);

if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);

if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
if (qi.max_numchannels != 0)
num_channels = min_t(u8, num_channels, qi.max_numchannels);

dclk_max = icl_sagv_max_dclk(&qi);

peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max;
maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */

maxdebw = min(sa->deprogbwlimit * 1000,
icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
/*
* clperchgroup = 4kpagespermempage * clperchperblock,
* clperchperblock = 8 / num_channels * interleave
*/
clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave;

for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
for (i = 0; i < num_groups; i++) {
struct intel_bw_info *bi = &dev_priv->max_bw[i];
struct intel_bw_info *bi_next;
int clpchgroup;
int j;

clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
if (i < num_groups - 1)
bi_next = &dev_priv->max_bw[i + 1];

clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;

if (i < num_groups - 1 && clpchgroup < clperchgroup)
bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
else
bi_next->num_planes = 0;

bi->num_qgv_points = qi.num_points;
bi->num_psf_gv_points = qi.num_psf_points;
Expand All @@ -310,7 +429,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
*/
ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
(clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);

bi->deratedbw[j] = min(maxdebw,
bw * (100 - sa->derating) / 100);
Expand All @@ -329,9 +448,6 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
"BW%d / PSF GV %d: num_planes=%d bw=%u\n",
i, j, bi->num_planes, bi->psf_bw[j]);
}

if (bi->num_planes == 1)
break;
}

/*
Expand Down Expand Up @@ -395,6 +511,34 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
return 0;
}

static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
int num_planes, int qgv_point)
{
int i;

/*
* Let's return max bw for 0 planes
*/
num_planes = max(1, num_planes);

for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
const struct intel_bw_info *bi =
&dev_priv->max_bw[i];

/*
* Pcode will not expose all QGV points when
* SAGV is forced to off/min/med/max.
*/
if (qgv_point >= bi->num_qgv_points)
return UINT_MAX;

if (num_planes <= bi->num_planes)
return bi->deratedbw[qgv_point];
}

return dev_priv->max_bw[0].deratedbw[qgv_point];
}

static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
int psf_gv_point)
{
Expand All @@ -412,13 +556,13 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv)
if (IS_DG2(dev_priv))
dg2_get_bw_info(dev_priv);
else if (IS_ALDERLAKE_P(dev_priv))
icl_get_bw_info(dev_priv, &adlp_sa_info);
tgl_get_bw_info(dev_priv, &adlp_sa_info);
else if (IS_ALDERLAKE_S(dev_priv))
icl_get_bw_info(dev_priv, &adls_sa_info);
tgl_get_bw_info(dev_priv, &adls_sa_info);
else if (IS_ROCKETLAKE(dev_priv))
icl_get_bw_info(dev_priv, &rkl_sa_info);
tgl_get_bw_info(dev_priv, &rkl_sa_info);
else if (DISPLAY_VER(dev_priv) == 12)
icl_get_bw_info(dev_priv, &tgl_sa_info);
tgl_get_bw_info(dev_priv, &tgl_sa_info);
else if (DISPLAY_VER(dev_priv) == 11)
icl_get_bw_info(dev_priv, &icl_sa_info);
}
Expand Down Expand Up @@ -746,7 +890,10 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
for (i = 0; i < num_qgv_points; i++) {
unsigned int max_data_rate;

max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
if (DISPLAY_VER(dev_priv) > 11)
max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i);
else
max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
/*
* We need to know which qgv point gives us
* maximum bandwidth in order to disable SAGV
Expand Down

0 comments on commit c64a9a7

Please sign in to comment.