From 36da893d43c80a97420d312f05c6921b7af5d105 Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 10:59:29 +0800 Subject: [PATCH 01/11] clk: qcom: clk-rcg2: fix clk_rcg2_calc_mnd() producing wrong M/N/pre_div Fix three related bugs in clk_rcg2_calc_mnd() that cause the GP clock MND divider to produce incorrect frequencies and duty cycles: 1) n_max and n_candidate are declared as u16 but can hold values exceeding 65535 during computation. When mnd_width is 16 and mnd_max is 65535, even m=1 gives n_max=65536 which overflows u16 to 0, making "n_candidate < n_max" always false. Similarly n_candidate overflows on intermediate products (e.g., 15360 * 5 = 76800 wraps to 11264), causing the wrong value to be accepted as n. Fix by changing both n_max and n_candidate from u16 to u32. 2) n_max is computed as (m + mnd_max), which only accounts for the N register constraint (n - m must fit in mnd_width bits). However, the D register shares the same mnd_width bits but must store values up to 2*n for duty cycle control. When n > mnd_max/2, the D register cannot represent high duty cycles, silently clamping the maximum achievable duty cycle (e.g. to 85% instead of 99%). Fix by computing n_max as (mnd_max + 1) / 2, ensuring 2*n always fits within the D register's bit width. 3) When no pre-division is needed (pre_div stays at its initial value of 1), "pre_div > 1 ? pre_div : 0" sets f->pre_div to 0. The subsequent convert_to_reg_val() computes (0 * 2 - 1) which underflows the u8 pre_div field to 255, programming a 128x pre-divider into hardware. Fix by assigning pre_div unconditionally. Since pre_div is initialized to 1 and only multiplied, it is always >= 1. A value of 1 correctly converts to register value 1 via convert_to_reg_val(), which means no pre-division in calc_rate(). Example with parent=19.2 MHz XO, requesting 25 kHz: Before: m=1, n=48, pre_div=15 -> 26666 Hz (6.7% error) After: m=1, n=768, pre_div=1 -> 25000 Hz (exact) Fixes: 898b72fa44f5 ("clk: qcom: gcc-sdm845: Add general purpose clock ops") Signed-off-by: Xilin Wu --- drivers/clk/qcom/clk-rcg2.c | 77 ++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index fae1c07982aba..618aac10f70de 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -279,7 +279,82 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw, static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f, u32 *_cfg) { - u32 cfg, mask, d_val, not2d_val, n_minus_m; + int i = 2; + unsigned int pre_div = 1; + unsigned long rates_gcd, scaled_parent_rate; + u32 n_max, n_candidate = 1; + u16 m, n = 1; + + rates_gcd = gcd(parent_rate, rate); + m = div64_u64(rate, rates_gcd); + scaled_parent_rate = div64_u64(parent_rate, rates_gcd); + + /* + * Limit n so that the D register can represent the full duty cycle + * range. The D register stores values up to 2*(n-m) using mnd_width + * bits. Since m >= 1, n <= (mnd_max + 1) / 2 guarantees + * 2*(n-m) <= mnd_max - 1. + */ + n_max = (mnd_max + 1) / 2; + + while (scaled_parent_rate > (unsigned long)n_max * pre_div_max) { + // we're exceeding divisor's range, trying lower scale. + if (m > 1) { + m--; + scaled_parent_rate = mult_frac(scaled_parent_rate, m, (m + 1)); + } else { + // cannot lower scale, just set max divisor values. + f->n = n_max; + f->pre_div = pre_div_max; + f->m = m; + return; + } + } + + while (scaled_parent_rate > 1) { + while (scaled_parent_rate % i == 0) { + n_candidate *= i; + if (n_candidate < n_max) + n = n_candidate; + else if (pre_div * i < pre_div_max) + pre_div *= i; + else + clk_rcg2_split_div(i, &pre_div, &n, pre_div_max); + + scaled_parent_rate /= i; + } + i++; + } + + f->m = m; + f->n = n; + f->pre_div = pre_div; +} + +static int clk_rcg2_determine_gp_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + struct freq_tbl f_tbl = {}, *f = &f_tbl; + int mnd_max = BIT(rcg->mnd_width) - 1; + int hid_max = BIT(rcg->hid_width) - 1; + struct clk_hw *parent; + u64 parent_rate; + + parent = clk_hw_get_parent(hw); + parent_rate = clk_get_rate(parent->clk); + if (!parent_rate) + return -EINVAL; + + clk_rcg2_calc_mnd(parent_rate, req->rate, f, mnd_max, hid_max / 2); + convert_to_reg_val(f); + req->rate = calc_rate(parent_rate, f->m, f->n, f->n, f->pre_div); + + return 0; +} + +static int __clk_rcg2_configure_parent(struct clk_rcg2 *rcg, u8 src, u32 *_cfg) +{ struct clk_hw *hw = &rcg->clkr.hw; int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); From 777afcc80959097a261e863b05b3fe7112a84b6d Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 15:32:11 +0800 Subject: [PATCH 02/11] clk: qcom: clk-rcg2: use 64-bit arithmetic in set_duty_cycle() The duty cycle calculation in clk_rcg2_set_duty_cycle() computes "n * duty->num * 2" using u32 arithmetic. When n is large and duty->num is also large, the intermediate result overflows u32. For example, requesting 50% duty on a 1 kHz output derived from a 19.2 MHz parent gives n=19200, duty->num=500000, duty->den=1000000: 19200 * 500000 * 2 = 19,200,000,000 > U32_MAX (4,294,967,295) The truncated result produces a completely wrong duty cycle (5.26% instead of the requested 50%). Use DIV_ROUND_CLOSEST_ULL() with an explicit (u64) cast to prevent the overflow. Fixes: 7f891faf596e ("clk: qcom: clk-rcg2: Add support for duty-cycle for RCG") Signed-off-by: Xilin Wu --- drivers/clk/qcom/clk-rcg2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 618aac10f70de..44cb766a55ad5 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -531,7 +531,7 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) duty_per = (duty->num * 100) / duty->den; /* Calculate 2d value */ - d = DIV_ROUND_CLOSEST(n * duty_per * 2, 100); + d = DIV_ROUND_CLOSEST_ULL((u64)n * duty->num * 2, duty->den); /* * Check bit widths of 2d. If D is too big reduce duty cycle. From fc0a9b6c4cdb49b2bc57e0a1e424b3bd42c1e13d Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 23:15:41 +0800 Subject: [PATCH 03/11] clk: qcom: clk-branch: calculate timeout based on clock frequency Clock branches with extremely low rates (tens of Hz to low kHz) take much longer to toggle than the fixed 200 us timeout allows. A 1 kHz clock needs at least 3 ms (3 cycles) to toggle. Instead of increasing the timeout to a huge fixed value for all clocks, dynamically compute the required timeout based on the current clock rate, accounting for 3 cycles at the current clock rate. Based on a downstream patch by Mike Tipton: https://git.codelinaro.org/clo/la/kernel/qcom/-/commit/aa899c2d1fa31e247f04810f125ac9c60927c901 Fixes: 6e0ad1b6c1c9 ("clk: qcom: Add support for branches/gate clocks") Signed-off-by: Mike Tipton Signed-off-by: Xilin Wu --- drivers/clk/qcom/clk-branch.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/clk-branch.c b/drivers/clk/qcom/clk-branch.c index e213a0284c21c..5632f45131d98 100644 --- a/drivers/clk/qcom/clk-branch.c +++ b/drivers/clk/qcom/clk-branch.c @@ -60,9 +60,27 @@ static bool clk_branch2_check_halt(const struct clk_branch *br, bool enabling) return (val & CBCR_CLK_OFF) == (invert ? 0 : CBCR_CLK_OFF); } +static int get_branch_timeout(const struct clk_branch *br) +{ + unsigned long rate; + int timeout; + + /* + * The time it takes a clock branch to toggle is roughly 3 clock cycles. + */ + rate = clk_hw_get_rate(&br->clkr.hw); + if (!rate) + return 200; + + timeout = 3 * (USEC_PER_SEC / rate); + + return max(timeout, 200); +} + static int clk_branch_wait(const struct clk_branch *br, bool enabling, bool (check_halt)(const struct clk_branch *, bool)) { + int timeout, count; bool voted = br->halt_check & BRANCH_VOTED; const char *name = clk_hw_get_name(&br->clkr.hw); @@ -78,9 +96,9 @@ static int clk_branch_wait(const struct clk_branch *br, bool enabling, } else if (br->halt_check == BRANCH_HALT_ENABLE || br->halt_check == BRANCH_HALT || (enabling && voted)) { - int count = 200; + timeout = get_branch_timeout(br); - while (count-- > 0) { + for (count = timeout; count > 0; count--) { if (check_halt(br, enabling)) return 0; udelay(1); From 85cf38888f41ad97d49ed5102c70ac22efe6009b Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 23:16:02 +0800 Subject: [PATCH 04/11] clk: qcom: clk-rcg2: calculate timeout based on clock frequency RCGs with extremely low rates (tens of Hz to low kHz) take much longer to update than the fixed 500 us timeout allows. A 1 kHz clock needs at least 3 ms (3 cycles) for the configuration handshake. Instead of increasing the timeout to a huge fixed value for all clocks, dynamically compute the required timeout based on both the old and new clock rates, accounting for 3 cycles at each rate. Based on a downstream patch by Mike Tipton: https://git.codelinaro.org/clo/la/kernel/qcom/-/commit/aa899c2d1fa31e247f04810f125ac9c60927c901 Fixes: bcd61c0f535a ("clk: qcom: Add support for root clock generators (RCGs)") Signed-off-by: Mike Tipton Signed-off-by: Xilin Wu --- drivers/clk/qcom/clk-rcg.h | 2 ++ drivers/clk/qcom/clk-rcg2.c | 63 +++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h index 7d0f925960559..dc64e57b1da62 100644 --- a/drivers/clk/qcom/clk-rcg.h +++ b/drivers/clk/qcom/clk-rcg.h @@ -141,6 +141,7 @@ extern const struct clk_ops clk_dyn_rcg_ops; * @clkr: regmap clock handle * @cfg_off: defines the cfg register offset from the CMD_RCGR + CFG_REG * @parked_cfg: cached value of the CFG register for parked RCGs + * @configured_freq: last configured frequency, used for timeout calculation * @hw_clk_ctrl: whether to enable hardware clock control */ struct clk_rcg2 { @@ -153,6 +154,7 @@ struct clk_rcg2 { struct clk_regmap clkr; u8 cfg_off; u32 parked_cfg; + unsigned long configured_freq; bool hw_clk_ctrl; }; diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 44cb766a55ad5..780b825ef83d9 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -108,9 +108,27 @@ static u8 clk_rcg2_get_parent(struct clk_hw *hw) return __clk_rcg2_get_parent(hw, cfg); } +static int get_update_timeout(const struct clk_rcg2 *rcg) +{ + int timeout = 0; + unsigned long current_freq; + + /* + * The time it takes an RCG to update is roughly 3 clock cycles of the + * old and new clock rates. + */ + current_freq = clk_hw_get_rate(&rcg->clkr.hw); + if (current_freq) + timeout += 3 * (USEC_PER_SEC / current_freq); + if (rcg->configured_freq) + timeout += 3 * (USEC_PER_SEC / rcg->configured_freq); + + return max(timeout, 500); +} + static int update_config(struct clk_rcg2 *rcg) { - int count, ret; + int timeout, count, ret; u32 cmd; struct clk_hw *hw = &rcg->clkr.hw; const char *name = clk_hw_get_name(hw); @@ -120,8 +138,10 @@ static int update_config(struct clk_rcg2 *rcg) if (ret) return ret; + timeout = get_update_timeout(rcg); + /* Wait for update to take effect */ - for (count = 500; count > 0; count--) { + for (count = timeout; count > 0; count--) { ret = regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CMD_REG, &cmd); if (ret) return ret; @@ -420,6 +440,28 @@ static int clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) if (ret) return ret; + rcg->configured_freq = f->freq; + + return update_config(rcg); +} + +static int clk_rcg2_configure_gp(struct clk_rcg2 *rcg, const struct freq_tbl *f) +{ + u32 cfg; + int ret; + + ret = regmap_read(rcg->clkr.regmap, RCG_CFG_OFFSET(rcg), &cfg); + if (ret) + return ret; + + ret = __clk_rcg2_configure_mnd(rcg, f, &cfg); + if (ret) + return ret; + + ret = regmap_write(rcg->clkr.regmap, RCG_CFG_OFFSET(rcg), cfg); + if (ret) + return ret; + return update_config(rcg); } @@ -452,6 +494,23 @@ static int clk_rcg2_set_rate(struct clk_hw *hw, unsigned long rate, return __clk_rcg2_set_rate(hw, rate, CEIL); } +static int clk_rcg2_set_gp_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + int mnd_max = BIT(rcg->mnd_width) - 1; + int hid_max = BIT(rcg->hid_width) - 1; + struct freq_tbl f_tbl = {}, *f = &f_tbl; + int ret; + + clk_rcg2_calc_mnd(parent_rate, rate, f, mnd_max, hid_max / 2); + convert_to_reg_val(f); + rcg->configured_freq = rate; + ret = clk_rcg2_configure_gp(rcg, f); + + return ret; +} + static int clk_rcg2_set_floor_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { From 9dab4f041aabd9b45360ddc77bcca606c65b0575 Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 18:10:53 +0800 Subject: [PATCH 05/11] clk: qcom: clk-rcg2: fix set_duty_cycle() integer overflow in boundary checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The duty cycle boundary checks in clk_rcg2_set_duty_cycle() use integer division to compare the 2d value against hardware limits: if ((d / 2) > (n - m)) d = (n - m) * 2; else if ((d / 2) < (m / 2)) d = m; When d is odd, d/2 truncates, allowing values one beyond the hardware maximum to pass. For example with n=7680, m=1, requesting 99.995% duty: d = 15359 (raw 2d value) d / 2 = 7679 (truncated) n - m = 7679 7679 > 7679 → false, check passes But d=15359 exceeds the hardware limit of 2*(n-m)=15358. Writing this invalid value causes the RCG to fail its configuration update, the CMD_UPDATE bit never clears, and the clock output stops entirely. The initial D value in __clk_rcg2_configure_mnd() correctly uses direct comparison without division: d_val = clamp_t(u32, d_val, f->m, 2 * (f->n - f->m)); Align set_duty_cycle() with the same bounds by comparing directly: if (d > (n - m) * 2) else if (d < m) Fixes: 7f891faf596e ("clk: qcom: clk-rcg2: Add support for duty-cycle for RCG") Signed-off-by: Xilin Wu --- drivers/clk/qcom/clk-rcg2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 780b825ef83d9..e4d29770aa324 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -598,9 +598,9 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) */ d = clamp_val(d, 1, mask); - if ((d / 2) > (n - m)) + if (d > (n - m) * 2) d = (n - m) * 2; - else if ((d / 2) < (m / 2)) + else if (d < m) d = m; not2d = ~d & mask; From 4263e1ee5c8c34e3466a236c65b016e7aa732eef Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 18:21:22 +0800 Subject: [PATCH 06/11] clk: qcom: gcc-sc7280: switch GP clocks to clk_rcg2_gp_ops The GP1/GP2/GP3 clock sources are general-purpose timer/PWM clocks that require runtime-computed MND divider values and duty cycle control. They are currently using clk_rcg2_ops with a frequency table containing only a few fixed entries (50/100/200 MHz), which: - Cannot produce arbitrary frequencies needed for PWM periods - Bypasses the MND divider (m=0, n=0), making duty cycle control impossible (MND is in bypass mode, set_duty_cycle returns -EINVAL) Switch to clk_rcg2_gp_ops which uses clk_rcg2_calc_mnd() to dynamically compute optimal M/N/pre_div values from any requested frequency, and empty the frequency table since it is not used by the GP ops path. Signed-off-by: Xilin Wu --- drivers/clk/qcom/gcc-sc7280.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/gcc-sc7280.c b/drivers/clk/qcom/gcc-sc7280.c index ad7b8463f840e..d601ed989882b 100644 --- a/drivers/clk/qcom/gcc-sc7280.c +++ b/drivers/clk/qcom/gcc-sc7280.c @@ -457,9 +457,6 @@ static struct clk_regmap_mux gcc_usb3_sec_phy_pipe_clk_src = { }; static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = { - F(50000000, P_GCC_GPLL0_OUT_EVEN, 6, 0, 0), - F(100000000, P_GCC_GPLL0_OUT_EVEN, 3, 0, 0), - F(200000000, P_GCC_GPLL0_OUT_ODD, 1, 0, 0), { } }; @@ -473,7 +470,7 @@ static struct clk_rcg2 gcc_gp1_clk_src = { .name = "gcc_gp1_clk_src", .parent_data = gcc_parent_data_4, .num_parents = ARRAY_SIZE(gcc_parent_data_4), - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_gp_ops, }, }; @@ -487,7 +484,7 @@ static struct clk_rcg2 gcc_gp2_clk_src = { .name = "gcc_gp2_clk_src", .parent_data = gcc_parent_data_4, .num_parents = ARRAY_SIZE(gcc_parent_data_4), - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_gp_ops, }, }; @@ -501,7 +498,7 @@ static struct clk_rcg2 gcc_gp3_clk_src = { .name = "gcc_gp3_clk_src", .parent_data = gcc_parent_data_4, .num_parents = ARRAY_SIZE(gcc_parent_data_4), - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_gp_ops, }, }; From 5b920ae68d58739e7a6e06558a3b8d7ccf085345 Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Mon, 6 Apr 2026 18:22:18 +0800 Subject: [PATCH 07/11] dt-bindings: pwm: clk-pwm: add optional GPIO and pinctrl properties The clk-pwm driver cannot produce constant output levels (0% or 100% duty cycle, or disabled state) through the clock hardware alone - the actual pin level when the clock is off is undefined and hardware-dependent. Document optional gpios, pinctrl-names, pinctrl-0, and pinctrl-1 properties that allow the driver to switch the pin between clock function mux (for normal PWM output) and GPIO mode (to drive a deterministic constant level). Signed-off-by: Xilin Wu --- .../devicetree/bindings/pwm/clk-pwm.yaml | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pwm/clk-pwm.yaml b/Documentation/devicetree/bindings/pwm/clk-pwm.yaml index ec1768291503c..2a0e3e02d27bf 100644 --- a/Documentation/devicetree/bindings/pwm/clk-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/clk-pwm.yaml @@ -15,6 +15,11 @@ description: | It's often possible to control duty-cycle of such clocks which makes them suitable for generating PWM signal. + Optionally, a GPIO and pinctrl states can be provided. When a constant + output level is needed (0%, 100%, or disabled), the pin is switched to + GPIO mode to drive the level directly. For normal PWM output the pin is + switched back to its clock function mux. + allOf: - $ref: pwm.yaml# @@ -29,6 +34,26 @@ properties: "#pwm-cells": const: 2 + gpios: + description: + Optional GPIO used to drive a constant level when the PWM output is + disabled or set to 0% / 100% duty cycle. When provided, pinctrl states + "default" (clock mux) and "gpio" must also be defined. + maxItems: 1 + + pinctrl-names: true + + pinctrl-0: + description: Pin configuration for clock function mux (normal PWM). + maxItems: 1 + + pinctrl-1: + description: Pin configuration for GPIO mode (constant level output). + maxItems: 1 + +dependencies: + gpios: [ pinctrl-0, pinctrl-1 ] + unevaluatedProperties: false required: @@ -41,6 +66,15 @@ examples: compatible = "clk-pwm"; #pwm-cells = <2>; clocks = <&gcc 0>; - pinctrl-names = "default"; + }; + + - | + pwm { + compatible = "clk-pwm"; + #pwm-cells = <2>; + clocks = <&gcc 0>; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pwm_clk_flash_default>; + pinctrl-1 = <&pwm_clk_flash_gpio>; + gpios = <&tlmm 32 0>; }; From bcc8f3b13001367d7253f62e15e52de7769fdf2a Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Mon, 18 Nov 2024 13:15:19 +0300 Subject: [PATCH 08/11] clk: qcom: clk-rcg2: document calc_rate function Update calc_rate docs to reflect, that pre_div is not pure divisor, but a register value, and requires conversion. Signed-off-by: Dzmitry Sankouski Link: https://lore.kernel.org/r/20241118-starqltechn_integration_upstream-v8-1-ac8e36a3aa65@gmail.com Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/clk-rcg2.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e4d29770aa324..9cf098b8bb665 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -168,12 +168,21 @@ static int clk_rcg2_set_parent(struct clk_hw *hw, u8 index) return update_config(rcg); } -/* - * Calculate m/n:d rate +/** + * calc_rate() - Calculate rate based on m/n:d values + * + * @rate: Parent rate. + * @m: Multiplier. + * @n: Divisor. + * @mode: Use zero to ignore m/n calculation. + * @hid_div: Pre divisor register value. Pre divisor value + * relates to hid_div as pre_div = (hid_div + 1) / 2. + * + * Return calculated rate according to formula: * * parent_rate m * rate = ----------- x --- - * hid_div n + * pre_div n */ static unsigned long calc_rate(unsigned long rate, u32 m, u32 n, u32 mode, u32 hid_div) From 1a3f854e28882f84db9993d72aa714e589ac677e Mon Sep 17 00:00:00 2001 From: William Norman Date: Fri, 17 Apr 2026 03:48:19 +0000 Subject: [PATCH 09/11] clk: qcom: rcg2: add dedicated GP clock ops Teach the GP RCG path to evaluate all candidate parents, program the selected parent and M/N state in one configuration update, and expose a GP-specific clk_ops instance with set_rate_and_parent support. Signed-off-by: William Norman --- drivers/clk/qcom/clk-rcg.h | 1 + drivers/clk/qcom/clk-rcg2.c | 252 +++++++++++++++++++++++++----------- 2 files changed, 178 insertions(+), 75 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h index dc64e57b1da62..8a48b318344aa 100644 --- a/drivers/clk/qcom/clk-rcg.h +++ b/drivers/clk/qcom/clk-rcg.h @@ -170,6 +170,7 @@ struct clk_rcg2_gfx3d { container_of(to_clk_rcg2(_hw), struct clk_rcg2_gfx3d, rcg) extern const struct clk_ops clk_rcg2_ops; +extern const struct clk_ops clk_rcg2_gp_ops; extern const struct clk_ops clk_rcg2_floor_ops; extern const struct clk_ops clk_rcg2_mux_closest_ops; extern const struct clk_ops clk_edp_pixel_ops; diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 9cf098b8bb665..8838a6ff92423 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -307,39 +308,95 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw, static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f, u32 *_cfg) +{ + u32 cfg, mask, d_val, not2d_val, n_minus_m; + struct clk_hw *hw = &rcg->clkr.hw; + int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); + + if (index < 0) + return index; + + if (rcg->mnd_width && f->n) { + mask = BIT(rcg->mnd_width) - 1; + ret = regmap_update_bits(rcg->clkr.regmap, + RCG_M_OFFSET(rcg), mask, f->m); + if (ret) + return ret; + + ret = regmap_update_bits(rcg->clkr.regmap, + RCG_N_OFFSET(rcg), mask, ~(f->n - f->m)); + if (ret) + return ret; + + /* Calculate 2d value */ + d_val = f->n; + + n_minus_m = f->n - f->m; + n_minus_m *= 2; + + d_val = clamp_t(u32, d_val, f->m, n_minus_m); + not2d_val = ~d_val & mask; + + ret = regmap_update_bits(rcg->clkr.regmap, + RCG_D_OFFSET(rcg), mask, not2d_val); + if (ret) + return ret; + } + + mask = BIT(rcg->hid_width) - 1; + mask |= CFG_SRC_SEL_MASK | CFG_MODE_MASK | CFG_HW_CLK_CTRL_MASK; + cfg = f->pre_div << CFG_SRC_DIV_SHIFT; + cfg |= rcg->parent_map[index].cfg << CFG_SRC_SEL_SHIFT; + if (rcg->mnd_width && f->n && (f->m != f->n)) + cfg |= CFG_MODE_DUAL_EDGE; + + *_cfg &= ~mask; + *_cfg |= cfg; + + return 0; +} + +static void convert_to_reg_val(struct freq_tbl *f) +{ + if (f->pre_div) + f->pre_div = 2 * f->pre_div - 1; +} + +static inline void clk_rcg2_split_div(int multiplier, unsigned int *pre_div, + u16 *n, unsigned int pre_div_max) +{ + *n = mult_frac(multiplier * *n, *pre_div, pre_div_max); + *pre_div = pre_div_max; +} + +static void clk_rcg2_calc_mnd(u64 parent_rate, u64 rate, struct freq_tbl *f, + unsigned int mnd_max, unsigned int pre_div_max) { int i = 2; unsigned int pre_div = 1; unsigned long rates_gcd, scaled_parent_rate; - u32 n_max, n_candidate = 1; - u16 m, n = 1; + u16 m, n = 1, n_candidate = 1, n_max; + + if (!parent_rate || !rate) + return; rates_gcd = gcd(parent_rate, rate); m = div64_u64(rate, rates_gcd); scaled_parent_rate = div64_u64(parent_rate, rates_gcd); - - /* - * Limit n so that the D register can represent the full duty cycle - * range. The D register stores values up to 2*(n-m) using mnd_width - * bits. Since m >= 1, n <= (mnd_max + 1) / 2 guarantees - * 2*(n-m) <= mnd_max - 1. - */ - n_max = (mnd_max + 1) / 2; - - while (scaled_parent_rate > (unsigned long)n_max * pre_div_max) { - // we're exceeding divisor's range, trying lower scale. + while (scaled_parent_rate > (mnd_max + m) * pre_div_max) { if (m > 1) { m--; scaled_parent_rate = mult_frac(scaled_parent_rate, m, (m + 1)); } else { - // cannot lower scale, just set max divisor values. - f->n = n_max; + f->n = mnd_max + m; f->pre_div = pre_div_max; f->m = m; return; } } + n_max = m + mnd_max; + while (scaled_parent_rate > 1) { while (scaled_parent_rate % i == 0) { n_candidate *= i; @@ -364,70 +421,67 @@ static int clk_rcg2_determine_gp_rate(struct clk_hw *hw, struct clk_rate_request *req) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); - struct freq_tbl f_tbl = {}, *f = &f_tbl; int mnd_max = BIT(rcg->mnd_width) - 1; int hid_max = BIT(rcg->hid_width) - 1; - struct clk_hw *parent; - u64 parent_rate; - - parent = clk_hw_get_parent(hw); - parent_rate = clk_get_rate(parent->clk); - if (!parent_rate) - return -EINVAL; - - clk_rcg2_calc_mnd(parent_rate, req->rate, f, mnd_max, hid_max / 2); - convert_to_reg_val(f); - req->rate = calc_rate(parent_rate, f->m, f->n, f->n, f->pre_div); - - return 0; -} - -static int __clk_rcg2_configure_parent(struct clk_rcg2 *rcg, u8 src, u32 *_cfg) -{ - struct clk_hw *hw = &rcg->clkr.hw; - int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); - - if (index < 0) - return index; - - if (rcg->mnd_width && f->n) { - mask = BIT(rcg->mnd_width) - 1; - ret = regmap_update_bits(rcg->clkr.regmap, - RCG_M_OFFSET(rcg), mask, f->m); - if (ret) - return ret; + int num_parents = clk_hw_get_num_parents(hw); + struct clk_hw *best_parent = NULL, *best_over_parent = NULL; + u64 best_parent_rate = 0, best_over_parent_rate = 0; + unsigned long best_rate = 0, best_over_rate = ULONG_MAX; + int i; - ret = regmap_update_bits(rcg->clkr.regmap, - RCG_N_OFFSET(rcg), mask, ~(f->n - f->m)); - if (ret) - return ret; + /* + * Iterate over all candidate parents and pick the closest achievable + * rate. Prefer the best rate at or below the request. If no parent can + * satisfy the request without overshooting, fall back to the smallest + * achievable rate above the request. + */ + for (i = 0; i < num_parents; i++) { + struct freq_tbl f_tbl = {}, *f = &f_tbl; + struct clk_hw *parent = clk_hw_get_parent_by_index(hw, i); + u64 prate; + unsigned long achieved; - /* Calculate 2d value */ - d_val = f->n; + if (!parent) + continue; + prate = clk_hw_get_rate(parent); + if (!prate || req->rate > prate) + continue; - n_minus_m = f->n - f->m; - n_minus_m *= 2; + clk_rcg2_calc_mnd(prate, req->rate, f, mnd_max, hid_max / 2); + if (!f->m || !f->n || f->m > f->n) + continue; + convert_to_reg_val(f); + achieved = calc_rate(prate, f->m, f->n, f->n, f->pre_div); + if (!achieved || achieved > prate) + continue; - d_val = clamp_t(u32, d_val, f->m, n_minus_m); - not2d_val = ~d_val & mask; + if (achieved <= req->rate) { + if (!best_parent || achieved > best_rate || + (achieved == best_rate && prate < best_parent_rate)) { + best_parent = parent; + best_parent_rate = prate; + best_rate = achieved; + } + } else if (!best_over_parent || achieved < best_over_rate || + (achieved == best_over_rate && prate < best_over_parent_rate)) { + best_over_parent = parent; + best_over_parent_rate = prate; + best_over_rate = achieved; + } + } - ret = regmap_update_bits(rcg->clkr.regmap, - RCG_D_OFFSET(rcg), mask, not2d_val); - if (ret) - return ret; + if (!best_parent) { + best_parent = best_over_parent; + best_parent_rate = best_over_parent_rate; + best_rate = best_over_rate; } - mask = BIT(rcg->hid_width) - 1; - mask |= CFG_SRC_SEL_MASK | CFG_MODE_MASK | CFG_HW_CLK_CTRL_MASK; - cfg = f->pre_div << CFG_SRC_DIV_SHIFT; - cfg |= rcg->parent_map[index].cfg << CFG_SRC_SEL_SHIFT; - if (rcg->mnd_width && f->n && (f->m != f->n)) - cfg |= CFG_MODE_DUAL_EDGE; - if (rcg->hw_clk_ctrl) - cfg |= CFG_HW_CLK_CTRL_MASK; + if (!best_parent) + return -EINVAL; - *_cfg &= ~mask; - *_cfg |= cfg; + req->best_parent_hw = best_parent; + req->best_parent_rate = best_parent_rate; + req->rate = best_rate; return 0; } @@ -463,7 +517,7 @@ static int clk_rcg2_configure_gp(struct clk_rcg2 *rcg, const struct freq_tbl *f) if (ret) return ret; - ret = __clk_rcg2_configure_mnd(rcg, f, &cfg); + ret = __clk_rcg2_configure(rcg, f, &cfg); if (ret) return ret; @@ -510,9 +564,46 @@ static int clk_rcg2_set_gp_rate(struct clk_hw *hw, unsigned long rate, int mnd_max = BIT(rcg->mnd_width) - 1; int hid_max = BIT(rcg->hid_width) - 1; struct freq_tbl f_tbl = {}, *f = &f_tbl; + u8 index; int ret; + if (!parent_rate || rate > parent_rate) + return -EINVAL; + clk_rcg2_calc_mnd(parent_rate, rate, f, mnd_max, hid_max / 2); + if (!f->m || !f->n || f->m > f->n) + return -EINVAL; + + index = clk_rcg2_get_parent(hw); + if (index >= clk_hw_get_num_parents(hw)) + return -EINVAL; + f->src = rcg->parent_map[index].src; + convert_to_reg_val(f); + rcg->configured_freq = rate; + ret = clk_rcg2_configure_gp(rcg, f); + + return ret; +} + +static int clk_rcg2_set_gp_rate_and_parent(struct clk_hw *hw, + unsigned long rate, unsigned long parent_rate, u8 index) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + int mnd_max = BIT(rcg->mnd_width) - 1; + int hid_max = BIT(rcg->hid_width) - 1; + struct freq_tbl f_tbl = {}, *f = &f_tbl; + int ret; + + if (!parent_rate || rate > parent_rate) + return -EINVAL; + if (index >= clk_hw_get_num_parents(hw)) + return -EINVAL; + + clk_rcg2_calc_mnd(parent_rate, rate, f, mnd_max, hid_max / 2); + if (!f->m || !f->n || f->m > f->n) + return -EINVAL; + + f->src = rcg->parent_map[index].src; convert_to_reg_val(f); rcg->configured_freq = rate; ret = clk_rcg2_configure_gp(rcg, f); @@ -577,7 +668,7 @@ static int clk_rcg2_get_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); - u32 notn_m, n, m, d, not2d, mask, duty_per, cfg; + u32 notn_m, n, m, d, not2d, mask, cfg; int ret; /* Duty-cycle cannot be modified for non-MND RCGs */ @@ -596,8 +687,6 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) n = (~(notn_m) + m) & mask; - duty_per = (duty->num * 100) / duty->den; - /* Calculate 2d value */ d = DIV_ROUND_CLOSEST_ULL((u64)n * duty->num * 2, duty->den); @@ -607,9 +696,9 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) */ d = clamp_val(d, 1, mask); - if (d > (n - m) * 2) + if ((d / 2) > (n - m)) d = (n - m) * 2; - else if (d < m) + else if ((d / 2) < (m / 2)) d = m; not2d = ~d & mask; @@ -635,6 +724,19 @@ const struct clk_ops clk_rcg2_ops = { }; EXPORT_SYMBOL_GPL(clk_rcg2_ops); +const struct clk_ops clk_rcg2_gp_ops = { + .is_enabled = clk_rcg2_is_enabled, + .get_parent = clk_rcg2_get_parent, + .set_parent = clk_rcg2_set_parent, + .recalc_rate = clk_rcg2_recalc_rate, + .determine_rate = clk_rcg2_determine_gp_rate, + .set_rate = clk_rcg2_set_gp_rate, + .set_rate_and_parent = clk_rcg2_set_gp_rate_and_parent, + .get_duty_cycle = clk_rcg2_get_duty_cycle, + .set_duty_cycle = clk_rcg2_set_duty_cycle, +}; +EXPORT_SYMBOL_GPL(clk_rcg2_gp_ops); + const struct clk_ops clk_rcg2_floor_ops = { .is_enabled = clk_rcg2_is_enabled, .get_parent = clk_rcg2_get_parent, From 668fa2743bd8d23e7b5afca0f9ea329e8411f181 Mon Sep 17 00:00:00 2001 From: William Norman Date: Fri, 17 Apr 2026 03:50:37 +0000 Subject: [PATCH 10/11] clk: qcom: gcc-sa8775p: wire GP clocks to clk_rcg2_gp_ops Switch the SA8775P GP clock sources over to the dedicated GP RCG ops and full parent map so they can take advantage of parent-aware rate selection instead of relying on the previous fixed-rate table. Signed-off-by: William Norman --- drivers/clk/qcom/gcc-sa8775p.c | 42 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/clk/qcom/gcc-sa8775p.c b/drivers/clk/qcom/gcc-sa8775p.c index 11c72a10ba698..a5e574e0c7167 100644 --- a/drivers/clk/qcom/gcc-sa8775p.c +++ b/drivers/clk/qcom/gcc-sa8775p.c @@ -721,8 +721,6 @@ static struct clk_rcg2 gcc_emac1_rgmii_clk_src = { }; static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = { - F(100000000, P_GCC_GPLL0_OUT_MAIN, 6, 0, 0), - F(200000000, P_GCC_GPLL0_OUT_MAIN, 3, 0, 0), { } }; @@ -730,13 +728,13 @@ static struct clk_rcg2 gcc_gp1_clk_src = { .cmd_rcgr = 0x70004, .mnd_width = 16, .hid_width = 5, - .parent_map = gcc_parent_map_2, + .parent_map = gcc_parent_map_4, .freq_tbl = ftbl_gcc_gp1_clk_src, .clkr.hw.init = &(const struct clk_init_data){ .name = "gcc_gp1_clk_src", - .parent_data = gcc_parent_data_2, - .num_parents = ARRAY_SIZE(gcc_parent_data_2), - .ops = &clk_rcg2_shared_ops, + .parent_data = gcc_parent_data_4, + .num_parents = ARRAY_SIZE(gcc_parent_data_4), + .ops = &clk_rcg2_gp_ops, }, }; @@ -744,13 +742,13 @@ static struct clk_rcg2 gcc_gp2_clk_src = { .cmd_rcgr = 0x71004, .mnd_width = 16, .hid_width = 5, - .parent_map = gcc_parent_map_2, + .parent_map = gcc_parent_map_4, .freq_tbl = ftbl_gcc_gp1_clk_src, .clkr.hw.init = &(const struct clk_init_data){ .name = "gcc_gp2_clk_src", - .parent_data = gcc_parent_data_2, - .num_parents = ARRAY_SIZE(gcc_parent_data_2), - .ops = &clk_rcg2_shared_ops, + .parent_data = gcc_parent_data_4, + .num_parents = ARRAY_SIZE(gcc_parent_data_4), + .ops = &clk_rcg2_gp_ops, }, }; @@ -758,13 +756,13 @@ static struct clk_rcg2 gcc_gp3_clk_src = { .cmd_rcgr = 0x62004, .mnd_width = 16, .hid_width = 5, - .parent_map = gcc_parent_map_2, + .parent_map = gcc_parent_map_4, .freq_tbl = ftbl_gcc_gp1_clk_src, .clkr.hw.init = &(const struct clk_init_data){ .name = "gcc_gp3_clk_src", - .parent_data = gcc_parent_data_2, - .num_parents = ARRAY_SIZE(gcc_parent_data_2), - .ops = &clk_rcg2_shared_ops, + .parent_data = gcc_parent_data_4, + .num_parents = ARRAY_SIZE(gcc_parent_data_4), + .ops = &clk_rcg2_gp_ops, }, }; @@ -772,13 +770,13 @@ static struct clk_rcg2 gcc_gp4_clk_src = { .cmd_rcgr = 0x1e004, .mnd_width = 16, .hid_width = 5, - .parent_map = gcc_parent_map_2, + .parent_map = gcc_parent_map_4, .freq_tbl = ftbl_gcc_gp1_clk_src, .clkr.hw.init = &(const struct clk_init_data){ .name = "gcc_gp4_clk_src", - .parent_data = gcc_parent_data_2, - .num_parents = ARRAY_SIZE(gcc_parent_data_2), - .ops = &clk_rcg2_shared_ops, + .parent_data = gcc_parent_data_4, + .num_parents = ARRAY_SIZE(gcc_parent_data_4), + .ops = &clk_rcg2_gp_ops, }, }; @@ -786,13 +784,13 @@ static struct clk_rcg2 gcc_gp5_clk_src = { .cmd_rcgr = 0x1f004, .mnd_width = 16, .hid_width = 5, - .parent_map = gcc_parent_map_2, + .parent_map = gcc_parent_map_4, .freq_tbl = ftbl_gcc_gp1_clk_src, .clkr.hw.init = &(const struct clk_init_data){ .name = "gcc_gp5_clk_src", - .parent_data = gcc_parent_data_2, - .num_parents = ARRAY_SIZE(gcc_parent_data_2), - .ops = &clk_rcg2_shared_ops, + .parent_data = gcc_parent_data_4, + .num_parents = ARRAY_SIZE(gcc_parent_data_4), + .ops = &clk_rcg2_gp_ops, }, }; From 1f371aad7e83fb84308969e9d26051b324c65f5d Mon Sep 17 00:00:00 2001 From: William Norman Date: Fri, 17 Apr 2026 03:52:10 +0000 Subject: [PATCH 11/11] pwm: clk: reprogram clocks safely for 50% duty Disable the backing clock before changing its rate and duty cycle, then re-enable it after the new configuration is in place. When the request is already for an exact 50%% duty cycle, skip the redundant clk_set_duty_cycle() call and leave the clock's default 50%% output untouched. Signed-off-by: William Norman --- drivers/pwm/pwm-clk.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/pwm/pwm-clk.c b/drivers/pwm/pwm-clk.c index 9dd88b386907c..7fb6a16038012 100644 --- a/drivers/pwm/pwm-clk.c +++ b/drivers/pwm/pwm-clk.c @@ -39,30 +39,30 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { struct pwm_clk_chip *pcchip = to_pwm_clk_chip(chip); + bool was_enabled = pwm->state.enabled; + bool needs_duty_program; int ret; u32 rate; u64 period = state->period; u64 duty_cycle = state->duty_cycle; if (!state->enabled) { - if (pwm->state.enabled) { + if (was_enabled) { clk_disable(pcchip->clk); pcchip->clk_enabled = false; } return 0; - } else if (!pwm->state.enabled) { - ret = clk_enable(pcchip->clk); - if (ret) - return ret; - pcchip->clk_enabled = true; } /* - * We have to enable the clk before setting the rate and duty_cycle, - * that however results in a window where the clk is on with a - * (potentially) different setting. Also setting period and duty_cycle - * are two separate calls, so that probably isn't atomic either. + * Some clock providers cannot safely update their rate while the output + * is running. Quiesce the clock first, program rate and duty cycle, and + * then re-enable it with the new settings. */ + if (was_enabled) { + clk_disable(pcchip->clk); + pcchip->clk_enabled = false; + } rate = DIV64_U64_ROUND_UP(NSEC_PER_SEC, period); ret = clk_set_rate(pcchip->clk, rate); @@ -72,7 +72,24 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->polarity == PWM_POLARITY_INVERSED) duty_cycle = period - duty_cycle; - return clk_set_duty_cycle(pcchip->clk, duty_cycle, period); + /* + * clk_set_rate() programs Qualcomm GP RCGs with a default 50% duty + * cycle. Avoid a redundant second update when the request is already + * exactly 50%, because some instances time out on that extra reprogram. + */ + needs_duty_program = duty_cycle != period - duty_cycle; + if (needs_duty_program) { + ret = clk_set_duty_cycle(pcchip->clk, duty_cycle, period); + if (ret) + return ret; + } + + ret = clk_enable(pcchip->clk); + if (ret) + return ret; + + pcchip->clk_enabled = true; + return 0; } static const struct pwm_ops pwm_clk_ops = {