Files
OpenCellular/test/thermal_old.c
Bill Richardson fcce7223a5 Completely new thermal/fan implementation
Problems with existing thermal control loop:
* Not multi-board friendly. thermal.c only supports Link and needs
  refactoring. Temp thresholds and fan speeds are hard-coded.
* Only the PECI temp is used to determine the fan speed. Other temp sensors
  are ignored.
* Has confusing data structures. Values in the CPU temp thresholds array mix
  ACPI thresholds with fan step values.

With this change, the thermal task monitors all temp sensors in order to
perform two completely independent functions:

Function one: Determine if the host needs to be throttled by or informed of
              any thermal events.

For thermal events, each temp sensor will have three threshold levels.

TEMP_HOST_WARN
* When any sensor goes above this level, host_throttle_cpu(1) will be called
  to ask the CPU to slow itself down.
* When all sensors drop below this level, host_throttle_cpu(0) will be called.
* Exactly AT this level, nothing happens (this provides hysteresis).

TEMP_HOST_HIGH
* When any sensor goes above this level, chipset_throttle_cpu(1) will be
  called to slow the CPU down whether it wants to or not.
* When all sensors drop below this level, chipset_throttle_cpu(0) will be
  called.
* Exactly AT this level, nothing happens (this provides hysteresis).

TEMP_HOST_SHUTDOWN
* When any sensor is above this level, chipset_force_shutdown() will be
  called to halt the CPU.
* Nothing turns the CPU back on again - the user just has to wait for things
  to cool off. Pressing the power button too soon will just trigger shutdown
  again as soon as the EC can read the host temp.

Function two: Determine the amount of fan cooling needed

For fan cooling, each temp sensor will have two levels.

TEMP_FAN_OFF
* At or below this temperature, no active cooling is needed.

TEMP_FAN_MAX
* At or above this temperature, active cooling should be running at maximum.

The highest level of all temp sensors will be used to request the amount of
active cooling needed. The function pwm_fan_percent_to_rpm() is invoked to
convert the amount of cooling to the target fan RPM.

The default pwm_fan_percent_to_rpm() function converts smoothly between the
configured CONFIG_PWM_FAN_RPM_MIN and CONFIG_PWM_FAN_RPM_MAX for percentages
between 1 and 100. 0% means "off".

The default function probably provide the smoothest and quietest behavior,
but individual boards can provide their own pwm_fan_percent_to_rpm() to
implement whatever curves, hysteresis, feedback, or other hackery they wish.

BUG=chrome-os-partner:20805
BRANCH=none
TEST=manual

Compile-time test with

  make BOARD=falco runtests

On the EC console, the existing fan commands should work correctly:

  faninfo       - display the fan state
  fanduty NUM   - force the fan PWM to the specified percentage (0-100)
  fanset RPM    - force the fan to the specified RPM
  fanset NUM%   - force the fan to the specified percentage (0-100) between
                  its configured minimum and maximum speeds from board.h
                  (CONFIG_PWM_FAN_RPM_MIN and CONFIG_PWM_FAN_RPM_MAX)
  fanauto       - let the EC control the fan automatically

You can test the default pwm_fan_percent_to_rpm() with

  fanset 1%
  faninfo

The fan should be turning at CONFIG_PWM_FAN_RPM_MIN. Let the EC control it
automatically again with

  fanauto

Also on the EC console, the thermal settings can be examined or changed:

  > temps
  PECI                : 327 K = 54 C
  ECInternal          : 320 K = 47 C
  G781Internal        : 319 K = 46 C
  G781External        : 318 K = 45 C
  >
  > thermalget
  sensor  warn  high  shutdown   fan_off fan_max   name
    0      373   387    383        333     363     PECI
    1        0     0      0          0       0     ECInternal
    2        0     0      0          0       0     G781Internal
    3        0     0      0          0       0     G781External
  >
  > help thermalset
  Usage: thermalset sensor warn [high [shutdown [fan_off [fan_max]]]]
  set thermal parameters (-1 to skip)
  >
  > thermalset 2 -1 -1 999
  sensor  warn  high  shutdown   fan_off fan_max   name
    0      373   387    383        333     363     PECI
    1        0     0      0          0       0     ECInternal
    2        0     0    999          0       0     G781Internal
    3        0     0      0          0       0     G781External
  >

From the host, ectool can be used to get and set these parameters with
nearly identical commands:

  ectool thermalget
  ectool thermalset 2 -1 -1 999

Change-Id: Idb27977278f766826045fb7d41929953ec6b1cca
Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/66688
Reviewed-by: Randall Spangler <rspangler@chromium.org>
2013-08-23 10:38:36 -07:00

427 lines
10 KiB
C

/* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*
* Test thermal engine.
*/
#include "common.h"
#include "console.h"
#include "hooks.h"
#include "host_command.h"
#include "printf.h"
#include "temp_sensor.h"
#include "test_util.h"
#include "thermal.h"
#include "timer.h"
#include "util.h"
static int mock_temp[TEMP_SENSOR_COUNT];
static int fan_rpm;
static int fan_rpm_mode = 1;
static int cpu_throttled;
static int cpu_down;
extern struct thermal_config_t thermal_config[TEMP_SENSOR_TYPE_COUNT];
extern const int fan_speed[THERMAL_FAN_STEPS + 1];
/*****************************************************************************/
/* Mock functions */
int temp_sensor_read(enum temp_sensor_id id, int *temp_ptr)
{
if (mock_temp[id] >= 0) {
*temp_ptr = mock_temp[id];
return EC_SUCCESS;
} else {
return -mock_temp[id];
}
}
void pwm_set_fan_rpm_mode(int rpm_mode)
{
fan_rpm_mode = rpm_mode;
}
void pwm_set_fan_target_rpm(int rpm)
{
fan_rpm = rpm;
}
void chipset_force_shutdown(void)
{
cpu_down = 1;
}
void chipset_throttle_cpu(int throttled)
{
cpu_throttled = throttled;
}
/*****************************************************************************/
/* Test utilities */
/* Test shorthands */
#define T_CPU TEMP_SENSOR_CPU
#define T_BOARD TEMP_SENSOR_BOARD
#define T_CASE TEMP_SENSOR_CASE
#define THRESHOLD(x, y) (thermal_config[x].thresholds[y])
#define FAN_THRESHOLD(x, y) THRESHOLD(x, THRESHOLD_COUNT + (y))
static void reset_mock_temp(void)
{
int i;
enum temp_sensor_type type;
for (i = 0; i < TEMP_SENSOR_COUNT; ++i) {
type = temp_sensors[i].type;
mock_temp[i] = FAN_THRESHOLD(type, 0) - 1;
}
}
static int wait_fan_rpm(int rpm, int timeout_secs)
{
do {
if (fan_rpm == rpm)
return 1;
usleep(SECOND);
} while (timeout_secs--);
return 0;
}
static int wait_value(int *v, int target, int timeout_secs)
{
do {
if (*v == target)
return 1;
usleep(SECOND);
} while (timeout_secs--);
return 0;
}
static int wait_set(int *v, int timeout_secs)
{
return wait_value(v, 1, timeout_secs);
}
static int wait_clear(int *v, int timeout_secs)
{
return wait_value(v, 0, timeout_secs);
}
/*****************************************************************************/
/* Tests */
static int test_init_val(void)
{
/* Initial mock temperature values are all zero. */
TEST_ASSERT(cpu_throttled == 0);
TEST_ASSERT(cpu_down == 0);
TEST_ASSERT(!(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_OVERLOAD)));
TEST_ASSERT(!(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_SHUTDOWN)));
return EC_SUCCESS;
}
static int test_cpu_fan(void)
{
reset_mock_temp();
/*
* Increase CPU temperature to first fan step and check if
* the fan comes up.
*/
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 0);
TEST_ASSERT(wait_fan_rpm(fan_speed[1], 11));
/* Increase CPU temperature to second fan step */
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 1);
TEST_ASSERT(wait_fan_rpm(fan_speed[2], 11));
/* Test threshold hysteresis */
mock_temp[T_CPU]--;
usleep(15 * SECOND);
TEST_ASSERT(fan_rpm == fan_speed[2]);
/* Test action delay */
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 4);
usleep((temp_sensors[T_CPU].action_delay_sec - 1) * SECOND);
TEST_ASSERT(fan_rpm == fan_speed[2]);
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 0);
return EC_SUCCESS;
}
static int test_safety(void)
{
reset_mock_temp();
/* Trigger CPU throttling */
mock_temp[T_CPU] = THRESHOLD(T_CPU, THRESHOLD_WARNING);
TEST_ASSERT(wait_set(&cpu_throttled, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_OVERLOAD));
/* Lower temperature. CPU not throttled anymore. */
mock_temp[T_CPU] = THRESHOLD(T_CPU, THRESHOLD_WARNING) - 5;
TEST_ASSERT(wait_clear(&cpu_throttled, 2));
/* Thermal shutdown */
mock_temp[T_CPU] = THRESHOLD(T_CPU, THRESHOLD_CPU_DOWN);
TEST_ASSERT(wait_set(&cpu_down, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_SHUTDOWN));
mock_temp[T_CPU] = 0;
usleep(SECOND);
cpu_down = 0;
mock_temp[T_CPU] = THRESHOLD(T_CPU, THRESHOLD_POWER_DOWN);
TEST_ASSERT(wait_set(&cpu_down, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_SHUTDOWN));
mock_temp[T_CPU] = 0;
cpu_down = 0;
return EC_SUCCESS;
}
static int test_sensor_failure(void)
{
reset_mock_temp();
/* Failure due to sensor not powered should be ignored */
mock_temp[T_CPU] = -EC_ERROR_NOT_POWERED;
usleep(5 * SECOND);
TEST_ASSERT(!(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL)));
/* Other failure should be pumped up to host */
mock_temp[T_CPU] = -EC_ERROR_UNKNOWN;
usleep(5 * SECOND);
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL));
return EC_SUCCESS;
}
static int test_sensor_info(void)
{
struct ec_params_temp_sensor_get_info params;
struct ec_response_temp_sensor_get_info resp;
int i;
for (i = 0; i < TEMP_SENSOR_COUNT; ++i) {
params.id = i;
TEST_ASSERT(test_send_host_command(
EC_CMD_TEMP_SENSOR_GET_INFO,
0, &params, sizeof(params),
&resp, sizeof(resp)) == EC_RES_SUCCESS);
TEST_ASSERT_ARRAY_EQ(resp.sensor_name,
temp_sensors[i].name,
strlen(resp.sensor_name));
TEST_ASSERT(resp.sensor_type == temp_sensors[i].type);
}
params.id = TEMP_SENSOR_COUNT;
TEST_ASSERT(test_send_host_command(
EC_CMD_TEMP_SENSOR_GET_INFO,
0, &params, sizeof(params),
&resp, sizeof(resp)) != EC_RES_SUCCESS);
return EC_SUCCESS;
}
static int set_threshold(int type, int threshold_id, int val)
{
struct ec_params_thermal_set_threshold params;
params.sensor_type = type;
params.threshold_id = threshold_id;
params.value = val;
return test_send_host_command(EC_CMD_THERMAL_SET_THRESHOLD, 0, &params,
sizeof(params), NULL, 0);
}
static int get_threshold(int type, int threshold_id, int *val)
{
struct ec_params_thermal_get_threshold params;
struct ec_response_thermal_get_threshold resp;
int rv;
params.sensor_type = type;
params.threshold_id = threshold_id;
rv = test_send_host_command(EC_CMD_THERMAL_GET_THRESHOLD, 0, &params,
sizeof(params), &resp, sizeof(resp));
if (rv != EC_RES_SUCCESS)
return rv;
*val = resp.value;
return EC_RES_SUCCESS;
}
static int verify_threshold(int type, int threshold_id, int val)
{
int actual_val;
if (get_threshold(type, threshold_id, &actual_val) != EC_RES_SUCCESS)
return 0;
return val == actual_val;
}
static int test_threshold_hostcmd(void)
{
reset_mock_temp();
/* Verify thresholds */
TEST_ASSERT(verify_threshold(T_CPU, THRESHOLD_WARNING,
THRESHOLD(T_CPU, THRESHOLD_WARNING)));
TEST_ASSERT(verify_threshold(T_BOARD, THRESHOLD_WARNING,
THRESHOLD(T_BOARD, THRESHOLD_WARNING)));
TEST_ASSERT(verify_threshold(T_CPU, THRESHOLD_CPU_DOWN,
THRESHOLD(T_CPU, THRESHOLD_CPU_DOWN)));
/* Lower CPU throttling threshold and trigger */
TEST_ASSERT(set_threshold(T_CPU, THRESHOLD_WARNING, 350) ==
EC_RES_SUCCESS);
mock_temp[T_CPU] = 355;
TEST_ASSERT(wait_set(&cpu_throttled, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_OVERLOAD));
/* Lower thermal shutdown threshold */
TEST_ASSERT(set_threshold(T_CPU, THRESHOLD_CPU_DOWN, 353) ==
EC_RES_SUCCESS);
TEST_ASSERT(wait_set(&cpu_down, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_SHUTDOWN));
/* Clear */
mock_temp[T_CPU] = 0;
TEST_ASSERT(wait_clear(&cpu_throttled, 2));
cpu_down = 0;
return EC_SUCCESS;
}
static int test_threshold_console_cmd(void)
{
char buf[100];
reset_mock_temp();
/* Lower CPU threshold and trigger */
snprintf(buf, 100, "thermalconf %d %d 330\n", T_CPU, THRESHOLD_WARNING);
UART_INJECT(buf);
msleep(100);
mock_temp[T_CPU] = 335;
TEST_ASSERT(wait_set(&cpu_throttled, 11));
TEST_ASSERT(host_get_events() &
EC_HOST_EVENT_MASK(EC_HOST_EVENT_THERMAL_OVERLOAD));
/* Set first fan step to 280 K */
snprintf(buf, 100, "thermalfan %d 0 280\n", T_CPU);
UART_INJECT(buf);
msleep(100);
mock_temp[T_CPU] = 280;
TEST_ASSERT(wait_fan_rpm(fan_speed[1], 11));
return EC_SUCCESS;
}
static int test_invalid_hostcmd(void)
{
int dummy;
TEST_ASSERT(set_threshold(TEMP_SENSOR_TYPE_COUNT, THRESHOLD_WARNING,
100) != EC_RES_SUCCESS);
TEST_ASSERT(set_threshold(T_CPU, THRESHOLD_COUNT + THERMAL_FAN_STEPS,
100) != EC_RES_SUCCESS);
TEST_ASSERT(get_threshold(TEMP_SENSOR_TYPE_COUNT, THRESHOLD_WARNING,
&dummy) != EC_RES_SUCCESS);
TEST_ASSERT(get_threshold(T_CPU, THRESHOLD_COUNT + THERMAL_FAN_STEPS,
&dummy) != EC_RES_SUCCESS);
return EC_SUCCESS;
}
static int test_auto_fan_ctrl(void)
{
reset_mock_temp();
/* Disable fan control */
pwm_set_fan_rpm_mode(0);
thermal_control_fan(0);
/*
* Increase CPU temperature to first fan step and check the fan
* doesn't come up.
*/
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 0);
TEST_ASSERT(!wait_fan_rpm(fan_speed[1], 11));
/* Enable fan control */
TEST_ASSERT(test_send_host_command(EC_CMD_THERMAL_AUTO_FAN_CTRL, 0,
NULL, 0, NULL, 0) == EC_RES_SUCCESS);
TEST_ASSERT(fan_rpm_mode == 1);
TEST_ASSERT(wait_fan_rpm(fan_speed[1], 11));
/* Disable fan control */
pwm_set_fan_rpm_mode(0);
thermal_control_fan(0);
/* Increase CPU temperature to second fan step */
mock_temp[T_CPU] = FAN_THRESHOLD(T_CPU, 1);
TEST_ASSERT(!wait_fan_rpm(fan_speed[2], 11));
/* Enable fan control by console command */
UART_INJECT("autofan\n");
msleep(100);
TEST_ASSERT(fan_rpm_mode == 1);
TEST_ASSERT(wait_fan_rpm(fan_speed[2], 11));
return EC_SUCCESS;
}
static int check_assumption(void)
{
TEST_ASSERT((int)TEMP_SENSOR_CPU == (int)TEMP_SENSOR_TYPE_CPU);
TEST_ASSERT((int)TEMP_SENSOR_BOARD == (int)TEMP_SENSOR_TYPE_BOARD);
TEST_ASSERT((int)TEMP_SENSOR_CASE == (int)TEMP_SENSOR_TYPE_CASE);
TEST_ASSERT(temp_sensors[T_CPU].action_delay_sec != 0);
TEST_ASSERT(thermal_config[T_CPU].config_flags &
THERMAL_CONFIG_WARNING_ON_FAIL);
return EC_SUCCESS;
}
void run_test(void)
{
test_reset();
/* Test assumptions */
RUN_TEST(check_assumption);
RUN_TEST(test_init_val);
RUN_TEST(test_cpu_fan);
/* No tests for board and case temp sensors as they are ignored. */
RUN_TEST(test_safety);
RUN_TEST(test_sensor_failure);
RUN_TEST(test_auto_fan_ctrl);
RUN_TEST(test_sensor_info);
RUN_TEST(test_threshold_hostcmd);
RUN_TEST(test_invalid_hostcmd);
RUN_TEST(test_threshold_console_cmd);
test_print_result();
}