g: rework DCRYPTO_app_cipher (AES-CTR) for speed

The previous implementation of DCRYPTO_app_cipher
consumed roughly 16ms to cipher a 16kB buffer
(i.e. performance that is far worse than the
hardware is capable of).

This change speeds up the implementation by about
85%, to the tune of roughly 2.2ms for a 16kB buffer.

The gains originate from various sources: loop
unrolling, data-pipelining, eliminating local
variables (to reduce register pressure), eliminating
support for unaligned input/output data, compiling
hot code with -O (rather the default -Os), and
using the hidden key-ladder, which need only be
setup once per reset.

This change also switches from AES-128 to AES-256.

BRANCH=none
BUG=chrome-os-partner:62260
TEST=make buildall succeeds;
	cipher command succeeds;
        TCG tests pass

Change-Id: I133741be6d9f1353d6ae732d0e863b4b18cc8c9e
Signed-off-by: nagendra modadugu <ngm@google.com>
Reviewed-on: https://chromium-review.googlesource.com/433359
Commit-Ready: Nagendra Modadugu <ngm@google.com>
Tested-by: Nagendra Modadugu <ngm@google.com>
Reviewed-by: Vadim Bendebury <vbendeb@chromium.org>
This commit is contained in:
nagendra modadugu
2017-01-25 22:33:43 -08:00
committed by chrome-bot
parent cf8c12e139
commit 927e01da02
6 changed files with 294 additions and 60 deletions

View File

@@ -27,5 +27,5 @@ void app_compute_hash(uint8_t *p_buf, size_t num_bytes,
int app_cipher(const void *salt, void *out, const void *in, size_t size)
{
return DCRYPTO_app_cipher(salt, out, in, size);
return DCRYPTO_app_cipher(NVMEM, salt, out, in, size);
}

View File

@@ -3,33 +3,173 @@
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "common.h"
#include "console.h"
#include "dcrypto.h"
#include "flash.h"
#include "hooks.h"
#include "shared_mem.h"
#include "task.h"
#include "timer.h"
#include "registers.h"
/* The default build options compile for size (-Os); instruct the
* compiler to optimize for speed here. Incidentally -O produces
* faster code than -O2!
*/
static int __attribute__((optimize("O")))
inner_loop(uint32_t **out, const uint32_t **in, size_t len)
{
uint32_t *outw = *out;
const uint32_t *inw = *in;
int DCRYPTO_app_cipher(const void *salt, void *out, const void *in, size_t len)
while (len >= 16) {
uint32_t w0, w1, w2, w3;
w0 = inw[0];
w1 = inw[1];
w2 = inw[2];
w3 = inw[3];
GREG32(KEYMGR, AES_WFIFO_DATA) = w0;
GREG32(KEYMGR, AES_WFIFO_DATA) = w1;
GREG32(KEYMGR, AES_WFIFO_DATA) = w2;
GREG32(KEYMGR, AES_WFIFO_DATA) = w3;
while (GREG32(KEYMGR, AES_RFIFO_EMPTY))
;
w0 = GREG32(KEYMGR, AES_RFIFO_DATA);
w1 = GREG32(KEYMGR, AES_RFIFO_DATA);
w2 = GREG32(KEYMGR, AES_RFIFO_DATA);
w3 = GREG32(KEYMGR, AES_RFIFO_DATA);
outw[0] = w0;
outw[1] = w1;
outw[2] = w2;
outw[3] = w3;
inw += 4;
outw += 4;
len -= 16;
}
*in = inw;
*out = outw;
return len;
}
static int outer_loop(uint32_t **out, const uint32_t **in, size_t len)
{
uint32_t *outw = *out;
const uint32_t *inw = *in;
if (len >= 16) {
GREG32(KEYMGR, AES_WFIFO_DATA) = inw[0];
GREG32(KEYMGR, AES_WFIFO_DATA) = inw[1];
GREG32(KEYMGR, AES_WFIFO_DATA) = inw[2];
GREG32(KEYMGR, AES_WFIFO_DATA) = inw[3];
inw += 4;
len -= 16;
len = inner_loop(&outw, &inw, len);
while (GREG32(KEYMGR, AES_RFIFO_EMPTY))
;
outw[0] = GREG32(KEYMGR, AES_RFIFO_DATA);
outw[1] = GREG32(KEYMGR, AES_RFIFO_DATA);
outw[2] = GREG32(KEYMGR, AES_RFIFO_DATA);
outw[3] = GREG32(KEYMGR, AES_RFIFO_DATA);
outw += 4;
}
*in = inw;
*out = outw;
return len;
}
static int aes_init(struct APPKEY_CTX *ctx, enum dcrypto_appid appid,
const uint32_t iv[4])
{
/* Setup USR-based application key. */
if (!DCRYPTO_appkey_init(appid, ctx))
return 0;
/* Configure AES engine. */
GWRITE_FIELD(KEYMGR, AES_CTRL, RESET, CTRL_NO_SOFT_RESET);
GWRITE_FIELD(KEYMGR, AES_CTRL, KEYSIZE, 2 /* AES-256 */);
GWRITE_FIELD(KEYMGR, AES_CTRL, CIPHER_MODE, CIPHER_MODE_CTR);
GWRITE_FIELD(KEYMGR, AES_CTRL, ENC_MODE, ENCRYPT_MODE);
GWRITE_FIELD(KEYMGR, AES_CTRL, CTR_ENDIAN, CTRL_CTR_BIG_ENDIAN);
/* Enable hidden key usage, each appid gets its own
* USR, with USR0 starting at 0x2a0.
*/
GWRITE_FIELD(KEYMGR, AES_USE_HIDDEN_KEY, INDEX,
0x2a0 + (appid * 2));
GWRITE_FIELD(KEYMGR, AES_USE_HIDDEN_KEY, ENABLE, 1);
GWRITE_FIELD(KEYMGR, AES_CTRL, ENABLE, CTRL_ENABLE);
/* Wait for key-expansion. */
GREG32(KEYMGR, AES_KEY_START) = 1;
while (GREG32(KEYMGR, AES_KEY_START))
;
/* Check for errors (e.g. USR not correctly setup. */
if (GREG32(KEYMGR, HKEY_ERR_FLAGS))
return 0;
/* Set IV. */
GR_KEYMGR_AES_CTR(0) = iv[0];
GR_KEYMGR_AES_CTR(1) = iv[1];
GR_KEYMGR_AES_CTR(2) = iv[2];
GR_KEYMGR_AES_CTR(3) = iv[3];
return 1;
}
int DCRYPTO_app_cipher(enum dcrypto_appid appid, const void *salt,
void *out, const void *in, size_t len)
{
struct APPKEY_CTX ctx;
uint32_t iv[4];
const uint32_t *inw = in;
uint32_t *outw = out;
memcpy(iv, salt, sizeof(iv));
if (!DCRYPTO_appkey_init(NVMEM, &ctx))
/* Test pointers for word alignment. */
if (((uintptr_t) in & 0x03) || ((uintptr_t) out & 0x03))
return 0;
if (!DCRYPTO_aes_ctr(out, ctx.key, 128, (uint8_t *) iv, in, len))
return 0;
{
/* Initialize key, and AES engine. */
uint32_t iv[4];
memcpy(iv, salt, sizeof(iv));
if (!aes_init(&ctx, appid, iv))
return 0;
}
len = outer_loop(&outw, &inw, len);
if (len) {
/* Cipher the final partial block */
uint32_t tmpin[4];
uint32_t tmpout[4];
const uint32_t *tmpinw;
uint32_t *tmpoutw;
tmpinw = tmpin;
tmpoutw = tmpout;
memcpy(tmpin, inw, len);
outer_loop(&tmpoutw, &tmpinw, 16);
memcpy(outw, tmpout, len);
}
DCRYPTO_appkey_finish(&ctx);
return 1;
}
#ifdef CRYPTO_TEST_SETUP
#include "common.h"
#include "console.h"
#include "hooks.h"
#include "shared_mem.h"
#include "task.h"
#include "timer.h"
/*
* Let's use some odd size to make sure unaligned buffers are handled
* properly.
@@ -58,14 +198,14 @@ static void run_cipher_cmd(void)
DCRYPTO_SHA1_hash((uint8_t *)p, TEST_BLOB_SIZE, sha);
tstamp = get_time().val;
rv = DCRYPTO_app_cipher(&sha, p, p, TEST_BLOB_SIZE);
rv = DCRYPTO_app_cipher(NVMEM, &sha, p, p, TEST_BLOB_SIZE);
tstamp = get_time().val - tstamp;
ccprintf("rv 0x%02x, out data %.16h, time %d us\n",
rv, p, tstamp);
if (rv == 1) {
tstamp = get_time().val;
rv = DCRYPTO_app_cipher(&sha, p, p, TEST_BLOB_SIZE);
rv = DCRYPTO_app_cipher(NVMEM, &sha, p, p, TEST_BLOB_SIZE);
tstamp = get_time().val - tstamp;
ccprintf("rv 0x%02x, orig. data %.16h, time %d us\n",
rv, p, tstamp);
@@ -104,3 +244,4 @@ static int cmd_cipher(int argc, char **argv)
return result;
}
DECLARE_SAFE_CONSOLE_COMMAND(cipher, cmd_cipher, NULL, NULL);
#endif

View File

@@ -6,40 +6,38 @@
#include "internal.h"
#include "endian.h"
#include "registers.h"
#include "console.h"
#include "shared_mem.h"
#include "cryptoc/util.h"
static const char * const dcrypto_app_names[] = {
"NVMEM"
const struct {
const char *name;
/* SHA256(name, strlen(name)) */
const uint32_t digest[SHA256_DIGEST_WORDS];
} dcrypto_app_names[] = {
{
"RESERVED",
{
0x89ef2e22, 0x0032b61a, 0x7b349ab1, 0x3f512449,
0x4cd161dd, 0x2a6cac94, 0x109a045a, 0x23d669ea
}
},
{
"NVMEM",
{
0xd137e92f, 0x0f39686e, 0xd663f548, 0x9b570397,
0x5801c4ce, 0x8e7c7654, 0xa2a13c85, 0x875779b6
}
},
};
int DCRYPTO_appkey_init(enum dcrypto_appid appid, struct APPKEY_CTX *ctx)
{
LITE_HMAC_CTX *hmac_ctx;
if (appid >= ARRAY_SIZE(dcrypto_app_names))
return 0;
memset(ctx, 0, sizeof(*ctx));
if (!DCRYPTO_ladder_compute_frk2(0, ctx->key))
if (!dcrypto_ladder_compute_usr(
appid, dcrypto_app_names[appid].digest))
return 0;
if (shared_mem_acquire(sizeof(LITE_HMAC_CTX),
(char **)&hmac_ctx) != EC_SUCCESS) {
return 0;
}
HMAC_SHA256_init(hmac_ctx, ctx->key, sizeof(ctx->key));
HMAC_update(hmac_ctx, dcrypto_app_names[appid],
strlen(dcrypto_app_names[appid]));
memcpy(ctx->key, HMAC_final(hmac_ctx), SHA256_DIGEST_SIZE);
always_memset(hmac_ctx, 0, sizeof(LITE_HMAC_CTX));
shared_mem_release(hmac_ctx);
return 1;
}

View File

@@ -253,19 +253,20 @@ int DCRYPTO_x509_verify(const uint8_t *cert, size_t len,
*/
int DCRYPTO_equals(const void *a, const void *b, size_t len);
int DCRYPTO_ladder_compute_frk2(size_t major_fw_version, uint8_t *frk2);
/*
* Application key related functions.
* Key-ladder and application key related functions.
*/
enum dcrypto_appid {
NVMEM = 0
RESERVED = 0,
NVMEM = 1
/* This enum value should not exceed 7. */
};
struct APPKEY_CTX {
uint8_t key[SHA256_DIGEST_SIZE];
};
int DCRYPTO_ladder_compute_frk2(size_t major_fw_version, uint8_t *frk2);
int DCRYPTO_appkey_init(enum dcrypto_appid id, struct APPKEY_CTX *ctx);
void DCRYPTO_appkey_finish(struct APPKEY_CTX *ctx);
@@ -278,14 +279,17 @@ BUILD_ASSERT(DCRYPTO_CIPHER_SALT_SIZE == CIPHER_SALT_SIZE);
*
* Encrypt or decrypt the input buffer, and write the correspondingly
* ciphered output to out. The number of bytes produced is equal to
* the number of input bytes.
* the number of input bytes. Note that the input and output pointers
* MUST be word-aligned.
*
* This API is expected to be applied to a single contiguous region. WARNING:
* Presently calling this function more than once with "in" pointing to
* logically different buffers will result in using the same IV value
* internally and as such reduce encryption efficiency. Upcoming changes are
* expected to make proper use of blob_iv.
* This API is expected to be applied to a single contiguous region.
* WARNING: A given salt/"in" pair MUST be unique, i.e. re-using a
* salt with a logically different input buffer is catastrophic. An
* example of a suitable salt is one that is derived from "in", e.g. a
* digest of the input data.
*
* @param appid the application-id of the calling context.
* @param salt pointer to a unique value to be associated with this blob,
* used for derivation of the proper IV, the size of the value
* is as defined by DCRYPTO_CIPHER_SALT_SIZE above.
@@ -294,6 +298,7 @@ BUILD_ASSERT(DCRYPTO_CIPHER_SALT_SIZE == CIPHER_SALT_SIZE);
* @param len Number of bytes to read from in / write to out.
* @return non-zero on success, and zero otherwise.
*/
int DCRYPTO_app_cipher(const void *salt, void *out, const void *in, size_t len);
int DCRYPTO_app_cipher(enum dcrypto_appid appid, const void *salt,
void *out, const void *in, size_t len);
#endif /* ! __EC_CHIP_G_DCRYPTO_DCRYPTO_H */

View File

@@ -118,6 +118,14 @@ void dcrypto_imem_load(size_t offset, const uint32_t *opcodes,
size_t n_opcodes);
void dcrypto_dmem_load(size_t offset, const void *words, size_t n_words);
/*
* Key ladder.
*/
enum dcrypto_appid; /* Forward declaration. */
int dcrypto_ladder_compute_usr(enum dcrypto_appid id,
const uint32_t usr_salt[8]);
/*
* Utility functions.
*/

View File

@@ -19,10 +19,8 @@ static void ladder_init(void)
*/
}
static int ladder_step(uint32_t cert)
static int ladder_step(uint32_t cert, const uint32_t input[8])
{
uint32_t itop;
GREG32(KEYMGR, SHA_ITOP) = 0; /* clear status */
GREG32(KEYMGR, SHA_USE_CERT_INDEX) =
@@ -34,9 +32,21 @@ static int ladder_step(uint32_t cert)
GREG32(KEYMGR, SHA_TRIG) =
GC_KEYMGR_SHA_TRIG_TRIG_GO_MASK;
do {
itop = GREG32(KEYMGR, SHA_ITOP);
} while (!itop);
if (input) {
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[0];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[1];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[2];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[3];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[4];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[5];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[6];
GREG32(KEYMGR, SHA_INPUT_FIFO) = input[7];
GREG32(KEYMGR, SHA_TRIG) = GC_KEYMGR_SHA_TRIG_TRIG_STOP_MASK;
}
while (!GREG32(KEYMGR, SHA_ITOP))
;
GREG32(KEYMGR, SHA_ITOP) = 0; /* clear status */
@@ -48,7 +58,7 @@ static int compute_certs(const uint32_t *certs, size_t num_certs)
int i;
for (i = 0; i < num_certs; i++) {
if (ladder_step(certs[i]))
if (ladder_step(certs[i], NULL))
return 0;
}
@@ -64,6 +74,8 @@ static int compute_certs(const uint32_t *certs, size_t num_certs)
#define KEYMGR_CERT_20 20
#define KEYMGR_CERT_25 25
#define KEYMGR_CERT_26 26
#define KEYMGR_CERT_34 34
#define KEYMGR_CERT_35 35
static const uint32_t FRK2_CERTS_PREFIX[] = {
KEYMGR_CERT_0,
@@ -101,7 +113,7 @@ int DCRYPTO_ladder_compute_frk2(size_t fw_version, uint8_t *frk2)
break;
for (i = 0; i < MAX_MAJOR_FW_VERSION - fw_version; i++) {
if (ladder_step(KEYMGR_CERT_25))
if (ladder_step(KEYMGR_CERT_25, NULL))
break;
}
@@ -118,3 +130,73 @@ int DCRYPTO_ladder_compute_frk2(size_t fw_version, uint8_t *frk2)
dcrypto_release_sha_hw();
return result;
}
/* ISR salt (SHA256("ISR_SALT")) to use for USR generation. */
static const uint32_t ISR_SALT[8] = {
0x6ba1b495, 0x4b7ca214, 0xfe07e922, 0x09735185,
0xfcca43ca, 0xc6d4dfd9, 0x5fc2fcca, 0xaa45400b
};
/* Map of populated USR registers. */
static int usr_ready[8] = {};
int dcrypto_ladder_compute_usr(enum dcrypto_appid id,
const uint32_t usr_salt[8])
{
int result = 0;
/* Check for USR readiness. */
if (usr_ready[id])
return 1;
if (!dcrypto_grab_sha_hw())
return 0;
do {
int i;
/* The previous check performed without lock acquisition. */
if (usr_ready[id]) {
result = 1;
break;
}
ladder_init();
if (!compute_certs(FRK2_CERTS_PREFIX,
ARRAY_SIZE(FRK2_CERTS_PREFIX)))
break;
/* USR generation requires running the key-ladder till
* the end (version 0), plus one additional iteration.
*/
for (i = 0; i < MAX_MAJOR_FW_VERSION - 0 + 1; i++) {
if (ladder_step(KEYMGR_CERT_25, NULL))
break;
}
if (i != MAX_MAJOR_FW_VERSION - 0 + 1)
break;
if (ladder_step(KEYMGR_CERT_34, ISR_SALT))
break;
/* Output goes to USR[appid] (the multiply by 2 is an
* artifact of slot addressing).
*/
GWRITE_FIELD(KEYMGR, SHA_CERT_OVERRIDE, DIGEST_PTR, 2 * id);
if (ladder_step(KEYMGR_CERT_35, usr_salt))
break;
/* Check for key-ladder errors. */
if (GREG32(KEYMGR, HKEY_ERR_FLAGS))
break;
/* Key deposited in USR[id], and ready to use. */
usr_ready[id] = 1;
result = 1;
} while (0);
dcrypto_release_sha_hw();
return result;
}