mirror of
https://github.com/lingble/meta-tegra.git
synced 2025-10-30 03:52:41 +00:00
196 lines
6.6 KiB
Diff
196 lines
6.6 KiB
Diff
From 417688bed63554f235fdc4b03c078a8ee2efcbc8 Mon Sep 17 00:00:00 2001
|
|
From: Sumit Gupta <sumitg@nvidia.com>
|
|
Date: Wed, 4 Oct 2023 19:35:37 +0530
|
|
Subject: [PATCH 4/9] cpufreq: tegra194: use refclk delta based loop instead of
|
|
udelay
|
|
|
|
Use reference clock count based loop instead of "udelay()" for
|
|
sampling of counters to improve the accuracy of re-generated CPU
|
|
frequency. "udelay()" internally calls "WFE" which stops the
|
|
counters and results in bigger delta between the last set freq
|
|
and the re-generated value from counters. The counter sampling
|
|
window used in loop is the minimum number of reference clock
|
|
cycles which is known to give a stable value of CPU frequency.
|
|
The change also helps to reduce the sampling window from "500us"
|
|
to "<50us".
|
|
|
|
Upstream-Status: Pending
|
|
Suggested-by: Antti Miettinen <amiettinen@nvidia.com>
|
|
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
|
|
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
|
|
---
|
|
drivers/cpufreq/tegra194-cpufreq.c | 72 +++++++++++++++++++++++-------
|
|
1 file changed, 55 insertions(+), 17 deletions(-)
|
|
|
|
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
|
|
index f6a8e6cf6d94..9dae6195e0e7 100644
|
|
--- a/drivers/cpufreq/tegra194-cpufreq.c
|
|
+++ b/drivers/cpufreq/tegra194-cpufreq.c
|
|
@@ -5,7 +5,6 @@
|
|
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpufreq.h>
|
|
-#include <linux/delay.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/module.h>
|
|
#include <linux/of.h>
|
|
@@ -21,10 +20,11 @@
|
|
|
|
#define KHZ 1000
|
|
#define REF_CLK_MHZ 408 /* 408 MHz */
|
|
-#define US_DELAY 500
|
|
#define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ)
|
|
#define MAX_CNT ~0U
|
|
|
|
+#define MAX_DELTA_KHZ 115200
|
|
+
|
|
#define NDIV_MASK 0x1FF
|
|
|
|
#define CORE_OFFSET(cpu) (cpu * 8)
|
|
@@ -68,6 +68,7 @@ struct tegra_cpufreq_soc {
|
|
int maxcpus_per_cluster;
|
|
unsigned int num_clusters;
|
|
phys_addr_t actmon_cntr_base;
|
|
+ u32 refclk_delta_min;
|
|
};
|
|
|
|
struct tegra194_cpufreq_data {
|
|
@@ -149,6 +150,8 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
|
|
{
|
|
struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
|
|
void __iomem *actmon_reg;
|
|
+ u32 delta_refcnt;
|
|
+ int cnt = 0;
|
|
u64 val;
|
|
|
|
actmon_reg = CORE_ACTMON_CNTR_REG(data, data->cpu_data[c->cpu].clusterid,
|
|
@@ -157,10 +160,25 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
|
|
val = readq(actmon_reg);
|
|
c->last_refclk_cnt = upper_32_bits(val);
|
|
c->last_coreclk_cnt = lower_32_bits(val);
|
|
- udelay(US_DELAY);
|
|
- val = readq(actmon_reg);
|
|
- c->refclk_cnt = upper_32_bits(val);
|
|
- c->coreclk_cnt = lower_32_bits(val);
|
|
+
|
|
+ /*
|
|
+ * The sampling window is based on the minimum number of reference
|
|
+ * clock cycles which is known to give a stable value of CPU frequency.
|
|
+ */
|
|
+ do {
|
|
+ val = readq(actmon_reg);
|
|
+ c->refclk_cnt = upper_32_bits(val);
|
|
+ c->coreclk_cnt = lower_32_bits(val);
|
|
+ if (c->refclk_cnt < c->last_refclk_cnt)
|
|
+ delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
|
|
+ else
|
|
+ delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
|
|
+ if (++cnt >= 0xFFFF) {
|
|
+ pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
|
|
+ c->cpu, delta_refcnt, cnt);
|
|
+ break;
|
|
+ }
|
|
+ } while (delta_refcnt < data->soc->refclk_delta_min);
|
|
}
|
|
|
|
static struct tegra_cpufreq_ops tegra234_cpufreq_ops = {
|
|
@@ -175,6 +193,7 @@ static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = {
|
|
.actmon_cntr_base = 0x9000,
|
|
.maxcpus_per_cluster = 4,
|
|
.num_clusters = 3,
|
|
+ .refclk_delta_min = 16000,
|
|
};
|
|
|
|
static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
|
|
@@ -182,6 +201,7 @@ static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
|
|
.actmon_cntr_base = 0x4000,
|
|
.maxcpus_per_cluster = 8,
|
|
.num_clusters = 1,
|
|
+ .refclk_delta_min = 16000,
|
|
};
|
|
|
|
static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid)
|
|
@@ -222,15 +242,33 @@ static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
|
|
|
|
static void tegra194_read_counters(struct tegra_cpu_ctr *c)
|
|
{
|
|
+ struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
|
|
+ u32 delta_refcnt;
|
|
+ int cnt = 0;
|
|
u64 val;
|
|
|
|
val = read_freq_feedback();
|
|
c->last_refclk_cnt = lower_32_bits(val);
|
|
c->last_coreclk_cnt = upper_32_bits(val);
|
|
- udelay(US_DELAY);
|
|
- val = read_freq_feedback();
|
|
- c->refclk_cnt = lower_32_bits(val);
|
|
- c->coreclk_cnt = upper_32_bits(val);
|
|
+
|
|
+ /*
|
|
+ * The sampling window is based on the minimum number of reference
|
|
+ * clock cycles which is known to give a stable value of CPU frequency.
|
|
+ */
|
|
+ do {
|
|
+ val = read_freq_feedback();
|
|
+ c->refclk_cnt = lower_32_bits(val);
|
|
+ c->coreclk_cnt = upper_32_bits(val);
|
|
+ if (c->refclk_cnt < c->last_refclk_cnt)
|
|
+ delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
|
|
+ else
|
|
+ delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
|
|
+ if (++cnt >= 0xFFFF) {
|
|
+ pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
|
|
+ c->cpu, delta_refcnt, cnt);
|
|
+ break;
|
|
+ }
|
|
+ } while (delta_refcnt < data->soc->refclk_delta_min);
|
|
}
|
|
|
|
static void tegra_read_counters(struct work_struct *work)
|
|
@@ -288,9 +326,8 @@ static unsigned int tegra194_calculate_speed(u32 cpu)
|
|
u32 rate_mhz;
|
|
|
|
/*
|
|
- * udelay() is required to reconstruct cpu frequency over an
|
|
- * observation window. Using workqueue to call udelay() with
|
|
- * interrupts enabled.
|
|
+ * Reconstruct cpu frequency over an observation/sampling window.
|
|
+ * Using workqueue to keep interrupts enabled during the interval.
|
|
*/
|
|
read_counters_work.c.cpu = cpu;
|
|
INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
|
|
@@ -372,9 +409,9 @@ static unsigned int tegra194_get_speed(u32 cpu)
|
|
if (pos->driver_data != ndiv)
|
|
continue;
|
|
|
|
- if (abs(pos->frequency - rate) > 115200) {
|
|
- pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n",
|
|
- cpu, rate, pos->frequency, ndiv);
|
|
+ if (abs(pos->frequency - rate) > MAX_DELTA_KHZ) {
|
|
+ pr_warn("cpufreq: cpu%d,cur:%u,set:%u,delta:%d,set ndiv:%llu\n",
|
|
+ cpu, rate, pos->frequency, abs(rate - pos->frequency), ndiv);
|
|
} else {
|
|
rate = pos->frequency;
|
|
}
|
|
@@ -568,6 +605,7 @@ static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = {
|
|
.ops = &tegra194_cpufreq_ops,
|
|
.maxcpus_per_cluster = 2,
|
|
.num_clusters = 4,
|
|
+ .refclk_delta_min = 16000,
|
|
};
|
|
|
|
static void tegra194_cpufreq_free_resources(void)
|
|
@@ -684,7 +722,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev)
|
|
|
|
soc = of_device_get_match_data(&pdev->dev);
|
|
|
|
- if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) {
|
|
+ if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters && soc->refclk_delta_min) {
|
|
data->soc = soc;
|
|
} else {
|
|
dev_err(&pdev->dev, "soc data missing\n");
|
|
--
|
|
2.25.1
|
|
|