Files
OpenCellular/common/thermal.c
Bill Richardson fcce7223a5 Completely new thermal/fan implementation
Problems with existing thermal control loop:
* Not multi-board friendly. thermal.c only supports Link and needs
  refactoring. Temp thresholds and fan speeds are hard-coded.
* Only the PECI temp is used to determine the fan speed. Other temp sensors
  are ignored.
* Has confusing data structures. Values in the CPU temp thresholds array mix
  ACPI thresholds with fan step values.

With this change, the thermal task monitors all temp sensors in order to
perform two completely independent functions:

Function one: Determine if the host needs to be throttled by or informed of
              any thermal events.

For thermal events, each temp sensor will have three threshold levels.

TEMP_HOST_WARN
* When any sensor goes above this level, host_throttle_cpu(1) will be called
  to ask the CPU to slow itself down.
* When all sensors drop below this level, host_throttle_cpu(0) will be called.
* Exactly AT this level, nothing happens (this provides hysteresis).

TEMP_HOST_HIGH
* When any sensor goes above this level, chipset_throttle_cpu(1) will be
  called to slow the CPU down whether it wants to or not.
* When all sensors drop below this level, chipset_throttle_cpu(0) will be
  called.
* Exactly AT this level, nothing happens (this provides hysteresis).

TEMP_HOST_SHUTDOWN
* When any sensor is above this level, chipset_force_shutdown() will be
  called to halt the CPU.
* Nothing turns the CPU back on again - the user just has to wait for things
  to cool off. Pressing the power button too soon will just trigger shutdown
  again as soon as the EC can read the host temp.

Function two: Determine the amount of fan cooling needed

For fan cooling, each temp sensor will have two levels.

TEMP_FAN_OFF
* At or below this temperature, no active cooling is needed.

TEMP_FAN_MAX
* At or above this temperature, active cooling should be running at maximum.

The highest level of all temp sensors will be used to request the amount of
active cooling needed. The function pwm_fan_percent_to_rpm() is invoked to
convert the amount of cooling to the target fan RPM.

The default pwm_fan_percent_to_rpm() function converts smoothly between the
configured CONFIG_PWM_FAN_RPM_MIN and CONFIG_PWM_FAN_RPM_MAX for percentages
between 1 and 100. 0% means "off".

The default function probably provide the smoothest and quietest behavior,
but individual boards can provide their own pwm_fan_percent_to_rpm() to
implement whatever curves, hysteresis, feedback, or other hackery they wish.

BUG=chrome-os-partner:20805
BRANCH=none
TEST=manual

Compile-time test with

  make BOARD=falco runtests

On the EC console, the existing fan commands should work correctly:

  faninfo       - display the fan state
  fanduty NUM   - force the fan PWM to the specified percentage (0-100)
  fanset RPM    - force the fan to the specified RPM
  fanset NUM%   - force the fan to the specified percentage (0-100) between
                  its configured minimum and maximum speeds from board.h
                  (CONFIG_PWM_FAN_RPM_MIN and CONFIG_PWM_FAN_RPM_MAX)
  fanauto       - let the EC control the fan automatically

You can test the default pwm_fan_percent_to_rpm() with

  fanset 1%
  faninfo

The fan should be turning at CONFIG_PWM_FAN_RPM_MIN. Let the EC control it
automatically again with

  fanauto

Also on the EC console, the thermal settings can be examined or changed:

  > temps
  PECI                : 327 K = 54 C
  ECInternal          : 320 K = 47 C
  G781Internal        : 319 K = 46 C
  G781External        : 318 K = 45 C
  >
  > thermalget
  sensor  warn  high  shutdown   fan_off fan_max   name
    0      373   387    383        333     363     PECI
    1        0     0      0          0       0     ECInternal
    2        0     0      0          0       0     G781Internal
    3        0     0      0          0       0     G781External
  >
  > help thermalset
  Usage: thermalset sensor warn [high [shutdown [fan_off [fan_max]]]]
  set thermal parameters (-1 to skip)
  >
  > thermalset 2 -1 -1 999
  sensor  warn  high  shutdown   fan_off fan_max   name
    0      373   387    383        333     363     PECI
    1        0     0      0          0       0     ECInternal
    2        0     0    999          0       0     G781Internal
    3        0     0      0          0       0     G781External
  >

From the host, ectool can be used to get and set these parameters with
nearly identical commands:

  ectool thermalget
  ectool thermalset 2 -1 -1 999

Change-Id: Idb27977278f766826045fb7d41929953ec6b1cca
Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/66688
Reviewed-by: Randall Spangler <rspangler@chromium.org>
2013-08-23 10:38:36 -07:00

268 lines
6.9 KiB
C

/* Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/* NEW thermal engine module for Chrome EC. This is a completely different
* implementation from the original version that shipped on Link.
*/
#include "chipset.h"
#include "common.h"
#include "console.h"
#include "fan.h"
#include "hooks.h"
#include "host_command.h"
#include "temp_sensor.h"
#include "thermal.h"
#include "timer.h"
#include "util.h"
/* Console output macros */
#define CPUTS(outstr) cputs(CC_THERMAL, outstr)
#define CPRINTF(format, args...) cprintf(CC_THERMAL, format, ## args)
test_mockable_static void smi_sensor_failure_warning(void)
{
CPRINTF("[%T can't read any temp sensors!]\n");
host_set_single_event(EC_HOST_EVENT_THERMAL);
}
static int fan_percent(int low, int high, int cur)
{
if (cur < low)
return 0;
if (cur > high)
return 100;
return 100 * (cur - low) / (high - low);
}
/* The logic below is hard-coded for only three thresholds: WARN, HIGH, HALT.
* This is just a sanity check to be sure we catch any changes in thermal.h
*/
BUILD_ASSERT(EC_TEMP_THRESH_COUNT == 3);
/* Keep track of which thresholds have triggered */
static cond_t cond_hot[EC_TEMP_THRESH_COUNT];
static void thermal_control(void)
{
int i, j, t, rv, f;
int count_over[EC_TEMP_THRESH_COUNT];
int count_under[EC_TEMP_THRESH_COUNT];
int num_valid_limits[EC_TEMP_THRESH_COUNT];
int num_sensors_read;
int fmax;
/* Get ready to count things */
memset(count_over, 0, sizeof(count_over));
memset(count_under, 0, sizeof(count_under));
memset(num_valid_limits, 0, sizeof(num_valid_limits));
num_sensors_read = 0;
fmax = 0;
/* go through all the sensors */
for (i = 0; i < TEMP_SENSOR_COUNT; ++i) {
/* read one */
rv = temp_sensor_read(i, &t);
if (rv != EC_SUCCESS)
continue;
else
num_sensors_read++;
/* check all the limits */
for (j = 0; j < EC_TEMP_THRESH_COUNT; j++) {
int limit = thermal_params[i].temp_host[j];
if (limit) {
num_valid_limits[j]++;
if (t > limit)
count_over[j]++;
else if (t < limit)
count_under[j]++;
}
}
/* figure out the max fan needed, too */
if (thermal_params[i].temp_fan_off &&
thermal_params[i].temp_fan_max) {
f = fan_percent(thermal_params[i].temp_fan_off,
thermal_params[i].temp_fan_max,
t);
if (f > fmax)
fmax = f;
}
}
if (!num_sensors_read) {
/* If we can't read any sensors, do nothing and hope
* it gets better.
* FIXME: What *should* we do?
*/
smi_sensor_failure_warning();
return;
}
/* See what the aggregated limits are. Any temp over the limit
* means it's hot, but all temps have to be under the limit to
* be cool again.
*/
for (j = 0; j < EC_TEMP_THRESH_COUNT; j++) {
if (count_over[j])
cond_set_true(&cond_hot[j]);
else if (count_under[j] == num_valid_limits[j])
cond_set_false(&cond_hot[j]);
}
/* What do we do about it? (note hard-coded logic). */
if (cond_went_true(&cond_hot[EC_TEMP_THRESH_HALT])) {
CPRINTF("[%T thermal SHUTDOWN]\n");
chipset_force_shutdown();
} else if (cond_went_false(&cond_hot[EC_TEMP_THRESH_HALT])) {
/* We don't reboot automatically - the user has to push
* the power button. It's likely that we can't even
* detect this sensor transition until then, but we
* do have to check in order to clear the cond_t.
*/
CPRINTF("[%T thermal no longer shutdown]\n");
}
if (cond_went_true(&cond_hot[EC_TEMP_THRESH_HIGH])) {
CPRINTF("[%T thermal HIGH]\n");
chipset_throttle_cpu(1);
} else if (cond_went_false(&cond_hot[EC_TEMP_THRESH_HIGH])) {
CPRINTF("[%T thermal no longer high]\n");
chipset_throttle_cpu(0);
}
if (cond_went_true(&cond_hot[EC_TEMP_THRESH_WARN])) {
CPRINTF("[%T thermal WARN]\n");
host_throttle_cpu(1);
} else if (cond_went_false(&cond_hot[EC_TEMP_THRESH_WARN])) {
CPRINTF("[%T thermal no longer warn]\n");
host_throttle_cpu(0);
}
/* Max fan needed is what's needed. */
pwm_fan_set_percent_needed(fmax);
}
/* Wait until after the sensors have been read */
DECLARE_HOOK(HOOK_SECOND, thermal_control, HOOK_PRIO_TEMP_SENSOR + 1);
/*****************************************************************************/
/* Console commands */
static int command_thermalget(int argc, char **argv)
{
int i;
ccprintf("sensor warn high halt fan_off fan_max name\n");
for (i = 0; i < TEMP_SENSOR_COUNT; i++) {
ccprintf(" %2d %3d %3d %3d %3d %3d %s\n",
i,
thermal_params[i].temp_host[EC_TEMP_THRESH_WARN],
thermal_params[i].temp_host[EC_TEMP_THRESH_HIGH],
thermal_params[i].temp_host[EC_TEMP_THRESH_HALT],
thermal_params[i].temp_fan_off,
thermal_params[i].temp_fan_max,
temp_sensors[i].name);
}
return EC_SUCCESS;
}
DECLARE_CONSOLE_COMMAND(thermalget, command_thermalget,
NULL,
"Print thermal parameters (degrees Kelvin)",
NULL);
static int command_thermalset(int argc, char **argv)
{
unsigned int n;
int i, val;
char *e;
if (argc < 3 || argc > 7)
return EC_ERROR_PARAM_COUNT;
n = (unsigned int)strtoi(argv[1], &e, 0);
if (*e)
return EC_ERROR_PARAM1;
for (i = 2; i < argc; i++) {
val = strtoi(argv[i], &e, 0);
if (*e)
return EC_ERROR_PARAM1 + i - 1;
if (val < 0)
continue;
switch (i) {
case 2:
thermal_params[n].temp_host[EC_TEMP_THRESH_WARN] = val;
break;
case 3:
thermal_params[n].temp_host[EC_TEMP_THRESH_HIGH] = val;
break;
case 4:
thermal_params[n].temp_host[EC_TEMP_THRESH_HALT] = val;
break;
case 5:
thermal_params[n].temp_fan_off = val;
break;
case 6:
thermal_params[n].temp_fan_max = val;
break;
}
}
command_thermalget(0, 0);
return EC_SUCCESS;
}
DECLARE_CONSOLE_COMMAND(thermalset, command_thermalset,
"sensor warn [high [shutdown [fan_off [fan_max]]]]",
"Set thermal parameters (degrees Kelvin)."
" Use -1 to skip.",
NULL);
/*****************************************************************************/
/* Host commands. We'll reuse the host command number, but this is version 1,
* not version 0. Different structs, different meanings.
*/
static int thermal_command_set_threshold(struct host_cmd_handler_args *args)
{
const struct ec_params_thermal_set_threshold_v1 *p = args->params;
if (p->sensor_num >= TEMP_SENSOR_COUNT)
return EC_RES_INVALID_PARAM;
thermal_params[p->sensor_num] = p->cfg;
return EC_RES_SUCCESS;
}
DECLARE_HOST_COMMAND(EC_CMD_THERMAL_SET_THRESHOLD,
thermal_command_set_threshold,
EC_VER_MASK(1));
static int thermal_command_get_threshold(struct host_cmd_handler_args *args)
{
const struct ec_params_thermal_get_threshold_v1 *p = args->params;
struct ec_thermal_config *r = args->response;
if (p->sensor_num >= TEMP_SENSOR_COUNT)
return EC_RES_INVALID_PARAM;
*r = thermal_params[p->sensor_num];
args->response_size = sizeof(*r);
return EC_RES_SUCCESS;
}
DECLARE_HOST_COMMAND(EC_CMD_THERMAL_GET_THRESHOLD,
thermal_command_get_threshold,
EC_VER_MASK(1));