⛏️ index : haiku.git

/*
 * Copyright 2024, Haiku, Inc. All Rights Reserved.
 * Distributed under the terms of the MIT License.
 */

#include "TigerLakePLL.h"

#include "accelerant.h"

#include <math.h>


#undef TRACE
#define TRACE_PLL
#ifdef TRACE_PLL
#   define TRACE(x...) _sPrintf("intel_extreme: " x)
#else
#   define TRACE(x...)
#endif

#define ERROR(x...) _sPrintf("intel_extreme: " x)
#define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)


/**
 * Compute the best PLL parameters for a given symbol clock frequency for a DVI or HDMI port.
 *
 * This is the algorithm documented in Intel Documentation: IHD-OS-TGL-Vol 12-12.21, page 182
 *
 * The clock generation on Tiger Lake is in two steps: first, a DCO generates a fractional
 * multiplication of the reference clock (in the GHz range). Then, 3 dividers bring this back into
 * the symbol clock frequency range.
 *
 * Reference clock (24 or 19.2MHz, as defined in DSSM Reference Frequency register)
 *             ||
 *             vv
 * DCO (multiply by non-integer value defined in DPLL_CFGCR0 register)
 *             ||
 *             vv
 * "DCO frequency" in the range 7998 - 10000 MHz
 *             ||
 *             vv
 * Divide by P, Q, and K
 *             ||
 *             vv
 * AFE clock (PLL output)
 *             ||
 *             vv
 * Divide by 5 (fixed)
 *             ||
 *             vv
 * Symbol clock (same as Pixel clock for 24-bit RGB)
 *
 * The algorithm to configure this is:
 * - Iterate over all allowed values for the divider obtained by P, Q and K
 * - Determine the one that results in the DCO frequency being as close as possible to 8999MHz
 * - Compute the corresponding values for P, Q and K and the DCO multiplier
 *
 * Since the DCO is a fractional multiplier (it can multiply by non-integer values), it will always
 * be possible to set the DCO to a "close enough" value in its available range. The only constraint
 * is getting it as close as possible to the midpoint (8999MHz), and at least have it in the
 * allowed range (7998 to 10000MHz). If this is not possible (too low or too high pixel clock), a
 * different video mode or setup will be needed (for example, enable dual link DVI to divide the
 * clock by two).
 *
 * This also means that this algorithm is independant of the initial reference frequency: there
 * will always be a way to setup the DCO so that it outputs the frequency computed here, no matter
 * what the input clock is.
 *
 * Unlinke in previous hardware generations, there is no need to satisfy multiple constraints at
 * the same time because of several stages of dividers and multipliers each with their own
 * frequency range.
 *
 * DCO multiplier = DCO integer + DCO fraction / 2^15
 * Symbol clock frequency = DCO multiplier * RefFreq in MHz / (5 * Pdiv * Qdiv * Kdiv)
 *
 * The symbol clock is the clock of the DVI/HDMI port. It defines how much time is needed to send
 * one "symbol", which corresponds to 8 bits of useful data for each channel (Red, Green and Blue).
 *
 * In our case (8 bit RGB videomode), the symbol clock is equal to the pixel rate. It would need
 * to be adjusted for 10 and 12-bit modes (more bits per pixel) as well as for YUV420 modes (the U
 * and V parts are sent only for some pixels, reducing the total bandwidth).
 *
 * @param[in] freq Desired symbol clock frequency in kHz
 * @param[out] Pdiv, Qdiv, Kdiv: dividers for the PLL
 * @param[out] bestdco Required DCO frequency, in the range 7998 to 10000, in MHz
 */
bool
ComputeHdmiDpll(int freq, int* Pdiv, int* Qdiv, int* Kdiv, float* bestdco)
{
	int bestdiv = 0;
	float dco = 0, dcocentrality = 0;
	float bestdcocentrality = 999999;

	// The allowed values for the divider depending on the allowed values for P, Q, and K:
	// - P can be 2, 3, 5 or 7
	// - K can be 1, 2, or 3
	// - Q can be 1 to 255 if K = 2. Otherwise, Q must be 1.
	// Not all possible combinations are listed here, more can be added if needed to reach lower
	// resolutions and refresh rates (probably not so interesting, this already allows to reach
	// frequencies low enough for all practical uses in a standard setup).
	const int dividerlist[] = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30, 32, 36,
		40, 42, 44, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 76, 78, 80, 84, 88, 90, 92,
		96, 98, 100, 102, 3, 5, 7, 9, 15, 21 };
	const float dcomin = 7998;
	const float dcomax = 10000;
	const float dcomid = (dcomin + dcomax) / 2;

	float afeclk = (5 * freq) / 1000;

	for (size_t i = 0; i < B_COUNT_OF(dividerlist); i++) {
		int div = dividerlist[i];
		dco = afeclk * div;
		if (dco <= dcomax && dco >= dcomin) {
			dcocentrality = fabs(dco - dcomid);
			if (dcocentrality < bestdcocentrality) {
				bestdcocentrality = dcocentrality;
				bestdiv = div;
				*bestdco = dco;
			}
		}
	}

	if (bestdiv != 0) {
		// Good divider found
		if (bestdiv % 2 == 0) {
			// Divider is even
			if (bestdiv == 2) {
				*Pdiv = 2;
				*Qdiv = 1;
				*Kdiv = 1;
			} else if (bestdiv % 4 == 0) {
				*Pdiv = 2;
				*Qdiv = bestdiv / 4;
				*Kdiv = 2;
			} else if (bestdiv % 6 == 0) {
				*Pdiv = 3;
				*Qdiv = bestdiv / 6;
				*Kdiv = 2;
			} else if (bestdiv % 5 == 0) {
				*Pdiv = 5;
				*Qdiv = bestdiv / 10;
				*Kdiv = 2;
			} else if (bestdiv % 14 == 0) {
				*Pdiv = 7;
				*Qdiv = bestdiv / 14;
				*Kdiv = 2;
			}
		} else {
			// Divider is odd
			if (bestdiv == 3 || bestdiv == 5 || bestdiv == 7) {
				*Pdiv = bestdiv;
				*Qdiv = 1;
				*Kdiv = 1;
			} else {
				// Divider is 9, 15, or 21
				*Pdiv = bestdiv / 3;
				*Qdiv = 1;
				*Kdiv = 3;
			}
		}

		// SUCCESS
		return true;
	} else {
		// No good divider found
		// FAIL, try a different frequency (different video mode)
		return false;
	}
}


/*! In the case of DisplayPort, the interface between the computer and the display is not just a
 * stream of pixels, but instead a packetized link. This means the interface does not need to be
 * running in sync with the pixel clock. Instead, a selection of well-defined frequencies are used.
 *
 * This also would allow to use a "spread spectrum" clock, reducing interferences without degrading
 * picture quality.
 *
 * Here we just set it to the isecond lowest predefined frequency of 2.7GHz, which will be enough
 * for displays up to full HD, and a little more.
 *
 * TODO decide when we have to use one of the higher frequencies. See "DisplayPort Mode PLL values"
 * in IHD-OS-TGL-Vol 12-12.21, page 178. However, my machine uses a slightly different value than
 * what's in Intel datasheets (with Intel values, bestdco should be 8100 and not 8090). I'm not
 * sure why that is so, possibly they shift the fractional value in the CFGR0 register by 9 bits
 * instead of 10? But replicating what my BIOS does here allows me to skip the PLL
 * programming, a good idea, because it seems we don't yet know how to properly disable and
 * re-train displayport once it is switched off.
 */
bool
ComputeDisplayPortDpll(int freq, int* Pdiv, int* Qdiv, int* Kdiv, float* bestdco)
{
	*Pdiv = 3;
	*Qdiv = 1;
	*Kdiv = 2;
	*bestdco = 8090;

	return true;
}


/*! Actually program the computed values (from the functions above) into the PLL, and start it.
 *
 * TODO: detect if the PLL is already running at the right frequency, and in that case, skip
 * reprogramming it altogether. In the case of DisplayPort, most often there is no need to change
 * anything once the clock has been initially set.
 */
status_t
ProgramPLL(int which, int Pdiv, int Qdiv, int Kdiv, float dco)
{
	// Set up the registers for PLL access for the requested PLL
	uint32 DPLL_CFGCR0;
	uint32 DPLL_CFGCR1;
	uint32 DPLL_ENABLE;
	uint32 DPLL_SPREAD_SPECTRUM;

	switch (which) {
		case 0:
			DPLL_ENABLE = TGL_DPLL0_ENABLE;
			DPLL_SPREAD_SPECTRUM = TGL_DPLL0_SPREAD_SPECTRUM;
			DPLL_CFGCR0 = TGL_DPLL0_CFGCR0;
			DPLL_CFGCR1 = TGL_DPLL0_CFGCR1;
			break;
		case 1:
			DPLL_ENABLE = TGL_DPLL1_ENABLE;
			DPLL_SPREAD_SPECTRUM = TGL_DPLL1_SPREAD_SPECTRUM;
			DPLL_CFGCR0 = TGL_DPLL1_CFGCR0;
			DPLL_CFGCR1 = TGL_DPLL1_CFGCR1;
			break;
		case 4:
			DPLL_ENABLE = TGL_DPLL4_ENABLE;
			DPLL_SPREAD_SPECTRUM = TGL_DPLL4_SPREAD_SPECTRUM;
			DPLL_CFGCR0 = TGL_DPLL4_CFGCR0;
			DPLL_CFGCR1 = TGL_DPLL4_CFGCR1;
			break;
		default:
			return B_BAD_VALUE;
	}

	// Find the reference frequency (24 or 19.2MHz)
	int ref_khz = gInfo->shared_info->pll_info.reference_frequency;

	// There is an automatic divide-by-two in this case
	if (ref_khz == 38400)
		ref_khz = 19200;

	float ref = ref_khz / 1000.0f;

	// Compute the DCO divider integer and fractional parts
	uint32 dco_int = (uint32)floorf(dco / ref);
	uint32 dco_frac = (uint32)ceilf((dco / ref - dco_int) * (1 << 15));

	int32 dco_reg = dco_int | (dco_frac << TGL_DPLL_DCO_FRACTION_SHIFT);

	int32 dividers = 0;
	switch (Pdiv) {
		case 2:
			dividers |= TGL_DPLL_PDIV_2;
			break;
		case 3:
			dividers |= TGL_DPLL_PDIV_3;
			break;
		case 5:
			dividers |= TGL_DPLL_PDIV_5;
			break;
		case 7:
			dividers |= TGL_DPLL_PDIV_7;
			break;
		default:
			return B_BAD_VALUE;
	}
	switch (Kdiv) {
		case 1:
			dividers |= TGL_DPLL_KDIV_1;
			break;
		case 2:
			dividers |= TGL_DPLL_KDIV_2;
			break;
		case 3:
			dividers |= TGL_DPLL_KDIV_3;
			break;
		default:
			return B_BAD_VALUE;
	}
	if (Qdiv != 1)
		dividers |= (Qdiv << TGL_DPLL_QDIV_RATIO_SHIFT) | TGL_DPLL_QDIV_ENABLE;

	int32 initialState = read32(DPLL_ENABLE);
	TRACE("DPLL_ENABLE(%" B_PRIx32 ") initial value = %" B_PRIx32 "\n", DPLL_ENABLE, initialState);

	if (initialState & TGL_DPLL_LOCK) {
		int32 oldDCO = read32(DPLL_CFGCR0);
		int32 oldDividers = read32(DPLL_CFGCR1);
		TRACE("DPLL already locked, checking current settings: DCO %" B_PRIx32 " -> %" B_PRIx32
				", dividers %" B_PRIx32 " -> %" B_PRIx32 "\n",
			oldDCO, dco_reg, oldDividers, dividers);

		if ((oldDCO == dco_reg) && (oldDividers == dividers)) {
			TRACE("DPLL already configured at the right frequency, no changes needed\n");
			return B_OK;
		}
	}

	// Before we start, disable the PLL
	write32(DPLL_ENABLE, read32(DPLL_ENABLE) & ~TGL_DPLL_ENABLE);
	while ((read32(DPLL_ENABLE) & TGL_DPLL_LOCK) != 0);
	TRACE("PLL is unlocked\n");

	// Enable PLL power
	write32(DPLL_ENABLE, read32(DPLL_ENABLE) | TGL_DPLL_POWER_ENABLE);

	// Wait for PLL to be powered up
	while ((read32(DPLL_ENABLE) & TGL_DPLL_POWER_STATE) == 0);
	TRACE("PLL is powered on\n");

	// Deactivate spread spectrum
	write32(DPLL_SPREAD_SPECTRUM, read32(DPLL_SPREAD_SPECTRUM) & ~TGL_DPLL_SSC_ENABLE);

	// Configure DCO
	write32(DPLL_CFGCR0, dco_reg);

	// Configure dividers
	write32(DPLL_CFGCR1, dividers);
	TRACE("DFGCR0(%" B_PRIx32 ") = %" B_PRIx32 ", CFGCR1(%" B_PRIx32 ") = %" B_PRIx32
			" (int = %" B_PRId32 ", frac = %" B_PRId32 ")\n", DPLL_CFGCR0, dco_reg,
		DPLL_CFGCR1, dividers, dco_int, dco_frac);

	// Read to make sure all writes are flushed to the hardware
	read32(DPLL_CFGCR1);

	// TODO Display voltage frequency switching?

	// Enable PLL
	write32(DPLL_ENABLE, read32(DPLL_ENABLE) | TGL_DPLL_ENABLE);
	TRACE("DPLL_ENABLE(%" B_PRIx32 ") = %" B_PRIx32 "\n", DPLL_ENABLE, read32(DPLL_ENABLE));

	// Wait for PLL to be enabled
	while ((read32(DPLL_ENABLE) & TGL_DPLL_LOCK) == 0);
	TRACE("PLL is locked\n");

	// TODO Display voltage frequency switching?

	return B_OK;
}