From 308d359b260d888f024a2d26c76cd4a50789e432 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 2 Dec 2016 13:51:54 +0000 Subject: [PATCH 1/2] Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard --- bl1/bl1.ld.S | 5 +- bl1/bl1_fwu.c | 4 +- bl2/aarch64/bl2_entrypoint.S | 6 +- bl2/bl2.ld.S | 5 +- bl2u/aarch64/bl2u_entrypoint.S | 4 +- bl2u/bl2u.ld.S | 5 +- bl31/bl31.ld.S | 5 +- bl32/sp_min/sp_min.ld.S | 7 +- bl32/tsp/aarch64/tsp_entrypoint.S | 6 +- bl32/tsp/tsp.ld.S | 5 +- common/bl_common.c | 6 +- docs/firmware-design.md | 2 +- include/common/aarch64/el3_common_macros.S | 6 +- include/lib/utils.h | 31 ++ lib/aarch32/misc_helpers.S | 115 +++++- lib/aarch64/misc_helpers.S | 375 ++++++++++++++++-- plat/arm/css/common/css_bl2_setup.c | 4 +- plat/mediatek/mt6795/bl31.ld.S | 5 +- .../tegra/common/drivers/memctrl/memctrl.c | 10 +- 19 files changed, 525 insertions(+), 81 deletions(-) diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S index b9554d15cf..b69065ee4b 100644 --- a/bl1/bl1.ld.S +++ b/bl1/bl1.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -133,7 +133,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss : ALIGN(16) { __BSS_START__ = .; diff --git a/bl1/bl1_fwu.c b/bl1/bl1_fwu.c index 1cc7daf629..f7fae68231 100644 --- a/bl1/bl1_fwu.c +++ b/bl1/bl1_fwu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -335,7 +335,7 @@ static int bl1_fwu_image_auth(unsigned int image_id, */ if (image_desc->state == IMAGE_STATE_COPIED) { /* Clear the memory.*/ - memset((void *)base_addr, 0, total_size); + zero_normalmem((void *)base_addr, total_size); flush_dcache_range(base_addr, total_size); /* Indicate that image can be copied again*/ diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S index 25363ace84..31f778790e 100644 --- a/bl2/aarch64/bl2_entrypoint.S +++ b/bl2/aarch64/bl2_entrypoint.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,12 +94,12 @@ func bl2_entrypoint */ ldr x0, =__BSS_START__ ldr x1, =__BSS_SIZE__ - bl zeromem16 + bl zeromem #if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ - bl zeromem16 + bl zeromem #endif /* -------------------------------------------- diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S index fa694de280..b9275f346d 100644 --- a/bl2/bl2.ld.S +++ b/bl2/bl2.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -113,7 +113,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss : ALIGN(16) { __BSS_START__ = .; diff --git a/bl2u/aarch64/bl2u_entrypoint.S b/bl2u/aarch64/bl2u_entrypoint.S index 1175c6ff11..9fa84bf42b 100644 --- a/bl2u/aarch64/bl2u_entrypoint.S +++ b/bl2u/aarch64/bl2u_entrypoint.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,7 +94,7 @@ func bl2u_entrypoint */ ldr x0, =__BSS_START__ ldr x1, =__BSS_SIZE__ - bl zeromem16 + bl zeromem /* -------------------------------------------- * Allocate a stack whose memory will be marked diff --git a/bl2u/bl2u.ld.S b/bl2u/bl2u.ld.S index d72589fcaf..91e8556ede 100644 --- a/bl2u/bl2u.ld.S +++ b/bl2u/bl2u.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,7 +100,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss : ALIGN(16) { __BSS_START__ = .; diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S index 9a05e6c3c0..e5d6232e5d 100644 --- a/bl31/bl31.ld.S +++ b/bl31/bl31.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -158,7 +158,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss (NOLOAD) : ALIGN(16) { __BSS_START__ = .; diff --git a/bl32/sp_min/sp_min.ld.S b/bl32/sp_min/sp_min.ld.S index e0e23e8f9e..f1d4d0b3f8 100644 --- a/bl32/sp_min/sp_min.ld.S +++ b/bl32/sp_min/sp_min.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -134,9 +134,10 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 8-byte aligned for better performance of the + * zero-initialization code. */ - .bss (NOLOAD) : ALIGN(16) { + .bss (NOLOAD) : ALIGN(8) { __BSS_START__ = .; *(.bss*) *(COMMON) diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S index 4c296d4a61..bdb882ab88 100644 --- a/bl32/tsp/aarch64/tsp_entrypoint.S +++ b/bl32/tsp/aarch64/tsp_entrypoint.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -120,12 +120,12 @@ func tsp_entrypoint */ ldr x0, =__BSS_START__ ldr x1, =__BSS_SIZE__ - bl zeromem16 + bl zeromem #if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ - bl zeromem16 + bl zeromem #endif /* -------------------------------------------- diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S index 7e24f66d7b..d93e3bb081 100644 --- a/bl32/tsp/tsp.ld.S +++ b/bl32/tsp/tsp.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -104,7 +104,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss : ALIGN(16) { __BSS_START__ = .; diff --git a/common/bl_common.c b/common/bl_common.c index 47bdad5a85..1d6653075f 100644 --- a/common/bl_common.c +++ b/common/bl_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -347,7 +347,7 @@ static int load_auth_image_internal(unsigned int image_id, image_data->image_size); if (rc != 0) { /* Authentication error, zero memory and flush it right away. */ - memset((void *)image_data->image_base, 0x00, + zero_normalmem((void *)image_data->image_base, image_data->image_size); flush_dcache_range(image_data->image_base, image_data->image_size); @@ -543,7 +543,7 @@ static int load_auth_image_internal(meminfo_t *mem_layout, image_data->image_size); if (rc != 0) { /* Authentication error, zero memory and flush it right away. */ - memset((void *)image_data->image_base, 0x00, + zero_normalmem((void *)image_data->image_base, image_data->image_size); flush_dcache_range(image_data->image_base, image_data->image_size); diff --git a/docs/firmware-design.md b/docs/firmware-design.md index bd6e2f6976..6a20659d15 100644 --- a/docs/firmware-design.md +++ b/docs/firmware-design.md @@ -1342,7 +1342,7 @@ All BL images share the following requirements: The following linker symbols are defined for this purpose: -* `__BSS_START__` Must be aligned on a 16-byte boundary. +* `__BSS_START__` * `__BSS_SIZE__` * `__COHERENT_RAM_START__` Must be aligned on a page-size boundary. * `__COHERENT_RAM_END__` Must be aligned on a page-size boundary. diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S index cbfa6eec76..2e70fc2e87 100644 --- a/include/common/aarch64/el3_common_macros.S +++ b/include/common/aarch64/el3_common_macros.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -252,12 +252,12 @@ ldr x0, =__BSS_START__ ldr x1, =__BSS_SIZE__ - bl zeromem16 + bl zeromem #if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ - bl zeromem16 + bl zeromem #endif #ifdef IMAGE_BL1 diff --git a/include/lib/utils.h b/include/lib/utils.h index b6bc9af678..69bbb430a2 100644 --- a/include/lib/utils.h +++ b/include/lib/utils.h @@ -80,4 +80,35 @@ # define ULL(_x) (_x##ull) #endif +/* + * C code should be put in this part of the header to avoid breaking ASM files + * or linker scripts including it. + */ +#if !(defined(__LINKER__) || defined(__ASSEMBLY__)) + +#include + +/* + * Fill a region of normal memory of size "length" in bytes with zero bytes. + * + * WARNING: This function can only operate on normal memory. This means that + * the MMU must be enabled when using this function. Otherwise, use + * zeromem. + */ +void zero_normalmem(void *mem, u_register_t length); + +/* + * Fill a region of memory of size "length" in bytes with null bytes. + * + * Unlike zero_normalmem, this function has no restriction on the type of + * memory targeted and can be used for any device memory as well as normal + * memory. This function must be used instead of zero_normalmem when MMU is + * disabled. + * + * NOTE: When data cache and MMU are enabled, prefer zero_normalmem for faster + * zeroing. + */ +void zeromem(void *mem, u_register_t length); +#endif /* !(defined(__LINKER__) || defined(__ASSEMBLY__)) */ + #endif /* __UTILS_H__ */ diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S index bf4084a828..dc8479951b 100644 --- a/lib/aarch32/misc_helpers.S +++ b/lib/aarch32/misc_helpers.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ .globl smc .globl zeromem + .globl zero_normalmem .globl memcpy4 .globl disable_mmu_icache_secure .globl disable_mmu_secure @@ -50,30 +51,108 @@ func smc endfunc smc /* ----------------------------------------------------------------------- - * void zeromem(void *mem, unsigned int length); + * void zeromem(void *mem, unsigned int length) + * + * Initialise a region in normal memory to 0. This functions complies with the + * AAPCS and can be called from C code. * - * Initialise a memory region to 0. - * The memory address and length must be 4-byte aligned. * ----------------------------------------------------------------------- */ func zeromem -#if ASM_ASSERTION - tst r0, #0x3 - ASM_ASSERT(eq) - tst r1, #0x3 - ASM_ASSERT(eq) -#endif - add r2, r0, r1 - mov r1, #0 -z_loop: - cmp r2, r0 - beq z_end - str r1, [r0], #4 - b z_loop -z_end: + /* + * Readable names for registers + * + * Registers r0, r1 and r2 are also set by zeromem which + * branches into the fallback path directly, so cursor, length and + * stop_address should not be retargeted to other registers. + */ + cursor .req r0 /* Start address and then current address */ + length .req r1 /* Length in bytes of the region to zero out */ + /* + * Reusing the r1 register as length is only used at the beginning of + * the function. + */ + stop_address .req r1 /* Address past the last zeroed byte */ + zeroreg1 .req r2 /* Source register filled with 0 */ + zeroreg2 .req r3 /* Source register filled with 0 */ + tmp .req r12 /* Temporary scratch register */ + + mov zeroreg1, #0 + + /* stop_address is the address past the last to zero */ + add stop_address, cursor, length + + /* + * Length cannot be used anymore as it shares the same register with + * stop_address. + */ + .unreq length + + /* + * If the start address is already aligned to 8 bytes, skip this loop. + */ + tst cursor, #(8-1) + beq .Lzeromem_8bytes_aligned + + /* Calculate the next address aligned to 8 bytes */ + orr tmp, cursor, #(8-1) + adds tmp, tmp, #1 + /* If it overflows, fallback to byte per byte zeroing */ + beq .Lzeromem_1byte_aligned + /* If the next aligned address is after the stop address, fall back */ + cmp tmp, stop_address + bhs .Lzeromem_1byte_aligned + + /* zero byte per byte */ +1: + strb zeroreg1, [cursor], #1 + cmp cursor, tmp + bne 1b + + /* zero 8 bytes at a time */ +.Lzeromem_8bytes_aligned: + + /* Calculate the last 8 bytes aligned address. */ + bic tmp, stop_address, #(8-1) + + cmp cursor, tmp + bhs 2f + + mov zeroreg2, #0 +1: + stmia cursor!, {zeroreg1, zeroreg2} + cmp cursor, tmp + blo 1b +2: + + /* zero byte per byte */ +.Lzeromem_1byte_aligned: + cmp cursor, stop_address + beq 2f +1: + strb zeroreg1, [cursor], #1 + cmp cursor, stop_address + bne 1b +2: bx lr + + .unreq cursor + /* + * length is already unreq'ed to reuse the register for another + * variable. + */ + .unreq stop_address + .unreq zeroreg1 + .unreq zeroreg2 + .unreq tmp endfunc zeromem +/* + * AArch32 does not have special ways of zeroing normal memory as AArch64 does + * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem. + */ +.equ zero_normalmem, zeromem + /* -------------------------------------------------------------------------- * void memcpy4(void *dest, const void *src, unsigned int length) * diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S index 574146f6db..84265e0b2a 100644 --- a/lib/aarch64/misc_helpers.S +++ b/lib/aarch64/misc_helpers.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,8 @@ .globl eret .globl smc + .globl zero_normalmem + .globl zeromem .globl zeromem16 .globl memcpy16 @@ -80,31 +82,358 @@ endfunc smc * * Initialise a memory region to 0. * The memory address must be 16-byte aligned. + * NOTE: This function is deprecated and zeromem should be used instead. * ----------------------------------------------------------------------- */ -func zeromem16 -#if ASM_ASSERTION - tst x0, #0xf - ASM_ASSERT(eq) -#endif - add x2, x0, x1 -/* zero 16 bytes at a time */ -z_loop16: - sub x3, x2, x0 - cmp x3, #16 - b.lt z_loop1 - stp xzr, xzr, [x0], #16 - b z_loop16 -/* zero byte per byte */ -z_loop1: - cmp x0, x2 - b.eq z_end - strb wzr, [x0], #1 - b z_loop1 -z_end: - ret -endfunc zeromem16 +.equ zeromem16, zeromem +/* ----------------------------------------------------------------------- + * void zero_normalmem(void *mem, unsigned int length); + * + * Initialise a region in normal memory to 0. This functions complies with the + * AAPCS and can be called from C code. + * + * NOTE: MMU must be enabled when using this function as it can only operate on + * normal memory. It is intended to be mainly used from C code when MMU + * is usually enabled. + * ----------------------------------------------------------------------- + */ +.equ zero_normalmem, zeromem_dczva + +/* ----------------------------------------------------------------------- + * void zeromem(void *mem, unsigned int length); + * + * Initialise a region of device memory to 0. This functions complies with the + * AAPCS and can be called from C code. + * + * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be + * used instead for faster zeroing. + * + * ----------------------------------------------------------------------- + */ +func zeromem + /* x2 is the address past the last zeroed address */ + add x2, x0, x1 + /* + * Uses the fallback path that does not use DC ZVA instruction and + * therefore does not need enabled MMU + */ + b .Lzeromem_dczva_fallback_entry +endfunc zeromem + +/* ----------------------------------------------------------------------- + * void zeromem_dczva(void *mem, unsigned int length); + * + * Fill a region of normal memory of size "length" in bytes with null bytes. + * MMU must be enabled and the memory be of + * normal type. This is because this function internally uses the DC ZVA + * instruction, which generates an Alignment fault if used on any type of + * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU + * is disabled, all memory behaves like Device-nGnRnE memory (see section + * D4.2.8), hence the requirement on the MMU being enabled. + * NOTE: The code assumes that the block size as defined in DCZID_EL0 + * register is at least 16 bytes. + * + * ----------------------------------------------------------------------- + */ +func zeromem_dczva + + /* + * The function consists of a series of loops that zero memory one byte + * at a time, 16 bytes at a time or using the DC ZVA instruction to + * zero aligned block of bytes, which is assumed to be more than 16. + * In the case where the DC ZVA instruction cannot be used or if the + * first 16 bytes loop would overflow, there is fallback path that does + * not use DC ZVA. + * Note: The fallback path is also used by the zeromem function that + * branches to it directly. + * + * +---------+ zeromem_dczva + * | entry | + * +----+----+ + * | + * v + * +---------+ + * | checks |>o-------+ (If any check fails, fallback) + * +----+----+ | + * | |---------------+ + * v | Fallback path | + * +------+------+ |---------------+ + * | 1 byte loop | | + * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end + * | | + * v | + * +-------+-------+ | + * | 16 bytes loop | | + * +-------+-------+ | + * | | + * v | + * +------+------+ .Lzeromem_dczva_blocksize_aligned + * | DC ZVA loop | | + * +------+------+ | + * +--------+ | | + * | | | | + * | v v | + * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned + * | | 16 bytes loop | | + * | +-------+-------+ | + * | | | + * | v | + * | +------+------+ .Lzeromem_dczva_final_1byte_aligned + * | | 1 byte loop | | + * | +-------------+ | + * | | | + * | v | + * | +---+--+ | + * | | exit | | + * | +------+ | + * | | + * | +--------------+ +------------------+ zeromem + * | | +----------------| zeromem function | + * | | | +------------------+ + * | v v + * | +-------------+ .Lzeromem_dczva_fallback_entry + * | | 1 byte loop | + * | +------+------+ + * | | + * +-----------+ + */ + + /* + * Readable names for registers + * + * Registers x0, x1 and x2 are also set by zeromem which + * branches into the fallback path directly, so cursor, length and + * stop_address should not be retargeted to other registers. + */ + cursor .req x0 /* Start address and then current address */ + length .req x1 /* Length in bytes of the region to zero out */ + /* Reusing x1 as length is never used after block_mask is set */ + block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */ + stop_address .req x2 /* Address past the last zeroed byte */ + block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */ + tmp1 .req x4 + tmp2 .req x5 + +#if ASM_ASSERTION + /* + * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3) + * register value and panic if the MMU is disabled. + */ +#if defined(IMAGE_BL1) || defined(IMAGE_BL31) + mrs tmp1, sctlr_el3 +#else + mrs tmp1, sctlr_el1 +#endif + + tst tmp1, #SCTLR_M_BIT + ASM_ASSERT(ne) +#endif /* ASM_ASSERTION */ + + /* stop_address is the address past the last to zero */ + add stop_address, cursor, length + + /* + * Get block_size = (log2() >> 2) (see encoding of + * dczid_el0 reg) + */ + mrs block_size, dczid_el0 + + /* + * Select the 4 lowest bits and convert the extracted log2() to + */ + ubfx block_size, block_size, #0, #4 + mov tmp2, #(1 << 2) + lsl block_size, tmp2, block_size + +#if ASM_ASSERTION + /* + * Assumes block size is at least 16 bytes to avoid manual realignment + * of the cursor at the end of the DCZVA loop. + */ + cmp block_size, #16 + ASM_ASSERT(hs) +#endif + /* + * Not worth doing all the setup for a region less than a block and + * protects against zeroing a whole block when the area to zero is + * smaller than that. Also, as it is assumed that the block size is at + * least 16 bytes, this also protects the initial aligning loops from + * trying to zero 16 bytes when length is less than 16. + */ + cmp length, block_size + b.lo .Lzeromem_dczva_fallback_entry + + /* + * Calculate the bitmask of the block alignment. It will never + * underflow as the block size is between 4 bytes and 2kB. + * block_mask = block_size - 1 + */ + sub block_mask, block_size, #1 + + /* + * length alias should not be used after this point unless it is + * defined as a register other than block_mask's. + */ + .unreq length + + /* + * If the start address is already aligned to zero block size, go + * straight to the cache zeroing loop. This is safe because at this + * point, the length cannot be smaller than a block size. + */ + tst cursor, block_mask + b.eq .Lzeromem_dczva_blocksize_aligned + + /* + * Calculate the first block-size-aligned address. It is assumed that + * the zero block size is at least 16 bytes. This address is the last + * address of this initial loop. + */ + orr tmp1, cursor, block_mask + add tmp1, tmp1, #1 + + /* + * If the addition overflows, skip the cache zeroing loops. This is + * quite unlikely however. + */ + cbz tmp1, .Lzeromem_dczva_fallback_entry + + /* + * If the first block-size-aligned address is past the last address, + * fallback to the simpler code. + */ + cmp tmp1, stop_address + b.hi .Lzeromem_dczva_fallback_entry + + /* + * If the start address is already aligned to 16 bytes, skip this loop. + * It is safe to do this because tmp1 (the stop address of the initial + * 16 bytes loop) will never be greater than the final stop address. + */ + tst cursor, #0xf + b.eq .Lzeromem_dczva_initial_1byte_aligned_end + + /* Calculate the next address aligned to 16 bytes */ + orr tmp2, cursor, #0xf + add tmp2, tmp2, #1 + /* If it overflows, fallback to the simple path (unlikely) */ + cbz tmp2, .Lzeromem_dczva_fallback_entry + /* + * Next aligned address cannot be after the stop address because the + * length cannot be smaller than 16 at this point. + */ + + /* First loop: zero byte per byte */ +1: + strb wzr, [cursor], #1 + cmp cursor, tmp2 + b.ne 1b +.Lzeromem_dczva_initial_1byte_aligned_end: + + /* + * Second loop: we need to zero 16 bytes at a time from cursor to tmp1 + * before being able to use the code that deals with block-size-aligned + * addresses. + */ + cmp cursor, tmp1 + b.hs 2f +1: + stp xzr, xzr, [cursor], #16 + cmp cursor, tmp1 + b.lo 1b +2: + + /* + * Third loop: zero a block at a time using DC ZVA cache block zeroing + * instruction. + */ +.Lzeromem_dczva_blocksize_aligned: + /* + * Calculate the last block-size-aligned address. If the result equals + * to the start address, the loop will exit immediately. + */ + bic tmp1, stop_address, block_mask + + cmp cursor, tmp1 + b.hs 2f +1: + /* Zero the block containing the cursor */ + dc zva, cursor + /* Increment the cursor by the size of a block */ + add cursor, cursor, block_size + cmp cursor, tmp1 + b.lo 1b +2: + + /* + * Fourth loop: zero 16 bytes at a time and then byte per byte the + * remaining area + */ +.Lzeromem_dczva_final_16bytes_aligned: + /* + * Calculate the last 16 bytes aligned address. It is assumed that the + * block size will never be smaller than 16 bytes so that the current + * cursor is aligned to at least 16 bytes boundary. + */ + bic tmp1, stop_address, #15 + + cmp cursor, tmp1 + b.hs 2f +1: + stp xzr, xzr, [cursor], #16 + cmp cursor, tmp1 + b.lo 1b +2: + + /* Fifth and final loop: zero byte per byte */ +.Lzeromem_dczva_final_1byte_aligned: + cmp cursor, stop_address + b.eq 2f +1: + strb wzr, [cursor], #1 + cmp cursor, stop_address + b.ne 1b +2: + ret + + /* Fallback for unaligned start addresses */ +.Lzeromem_dczva_fallback_entry: + /* + * If the start address is already aligned to 16 bytes, skip this loop. + */ + tst cursor, #0xf + b.eq .Lzeromem_dczva_final_16bytes_aligned + + /* Calculate the next address aligned to 16 bytes */ + orr tmp1, cursor, #15 + add tmp1, tmp1, #1 + /* If it overflows, fallback to byte per byte zeroing */ + cbz tmp1, .Lzeromem_dczva_final_1byte_aligned + /* If the next aligned address is after the stop address, fall back */ + cmp tmp1, stop_address + b.hs .Lzeromem_dczva_final_1byte_aligned + + /* Fallback entry loop: zero byte per byte */ +1: + strb wzr, [cursor], #1 + cmp cursor, tmp1 + b.ne 1b + + b .Lzeromem_dczva_final_16bytes_aligned + + .unreq cursor + /* + * length is already unreq'ed to reuse the register for another + * variable. + */ + .unreq stop_address + .unreq block_size + .unreq block_mask + .unreq tmp1 + .unreq tmp2 +endfunc zeromem_dczva /* -------------------------------------------------------------------------- * void memcpy16(void *dest, const void *src, unsigned int length) diff --git a/plat/arm/css/common/css_bl2_setup.c b/plat/arm/css/common/css_bl2_setup.c index 11ca34230f..5361d897ec 100644 --- a/plat/arm/css/common/css_bl2_setup.c +++ b/plat/arm/css/common/css_bl2_setup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -98,7 +98,7 @@ void bl2_platform_setup(void) * - restoring the SCP boot configuration. */ VERBOSE("BL2: Restoring SCP reset data in Trusted SRAM\n"); - memset((void *) ARM_TRUSTED_SRAM_BASE, 0, 128); + zero_normalmem((void *)ARM_TRUSTED_SRAM_BASE, 128); mmio_write_32(SCP_BOOT_CFG_ADDR, scp_boot_config); } #endif /* EL3_PAYLOAD_BASE */ diff --git a/plat/mediatek/mt6795/bl31.ld.S b/plat/mediatek/mt6795/bl31.ld.S index 44510a75c0..472cd2e0da 100644 --- a/plat/mediatek/mt6795/bl31.ld.S +++ b/plat/mediatek/mt6795/bl31.ld.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -113,7 +113,8 @@ SECTIONS /* * The .bss section gets initialised to 0 at runtime. - * Its base address must be 16-byte aligned. + * Its base address should be 16-byte aligned for better performance of the + * zero-initialization code. */ .bss (NOLOAD) : ALIGN(16) { __BSS_START__ = .; diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c index 40d1bab067..689f2d7cd9 100644 --- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c +++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,8 +37,6 @@ #include #include -extern void zeromem16(void *mem, unsigned int length); - #define TEGRA_GPU_RESET_REG_OFFSET 0x28c #define GPU_RESET_BIT (1 << 24) @@ -114,13 +112,13 @@ static void tegra_clear_videomem(uintptr_t non_overlap_area_start, * Perform cache maintenance to ensure that the non-overlapping area is * zeroed out. The first invalidation of this range ensures that * possible evictions of dirty cache lines do not interfere with the - * 'zeromem16' operation. Other CPUs could speculatively prefetch the + * 'zeromem' operation. Other CPUs could speculatively prefetch the * main memory contents of this area between the first invalidation and - * the 'zeromem16' operation. The second invalidation ensures that any + * the 'zeromem' operation. The second invalidation ensures that any * such cache lines are removed as well. */ inv_dcache_range(non_overlap_area_start, non_overlap_area_size); - zeromem16((void *)non_overlap_area_start, non_overlap_area_size); + zeromem((void *)non_overlap_area_start, non_overlap_area_size); inv_dcache_range(non_overlap_area_start, non_overlap_area_size); } From 32f0d3c6c3fb1fb9353ec0b82ddb099281b9328c Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Thu, 26 Jan 2017 15:54:44 +0000 Subject: [PATCH 2/2] Replace some memset call by zeromem Replace all use of memset by zeromem when zeroing moderately-sized structure by applying the following transformation: memset(x, 0, sizeof(x)) => zeromem(x, sizeof(x)) As the Trusted Firmware is compiled with -ffreestanding, it forbids the compiler from using __builtin_memset and forces it to generate calls to the slow memset implementation. Zeromem is a near drop in replacement for this use case, with a more efficient implementation on both AArch32 and AArch64. Change-Id: Ia7f3a90e888b96d056881be09f0b4d65b41aa79e Signed-off-by: Douglas Raillard --- bl32/sp_min/sp_min_main.c | 5 ++- drivers/auth/mbedtls/mbedtls_x509_parser.c | 5 ++- drivers/emmc/emmc.c | 39 ++++++++++--------- drivers/io/io_block.c | 7 ++-- drivers/io/io_fip.c | 5 ++- drivers/io/io_memmap.c | 5 ++- drivers/partition/gpt.c | 5 ++- lib/el3_runtime/aarch32/context_mgmt.c | 5 ++- lib/el3_runtime/aarch64/context_mgmt.c | 5 ++- lib/psci/psci_common.c | 9 +++-- plat/arm/common/arm_bl2_setup.c | 5 ++- plat/arm/css/drivers/scpi/css_scpi.c | 6 ++- .../tegra/common/drivers/memctrl/memctrl.c | 1 + plat/qemu/qemu_bl2_setup.c | 6 +-- .../rk3399/drivers/dram/dram_spec_timing.c | 11 +++--- plat/xilinx/zynqmp/pm_service/pm_client.c | 5 ++- services/spd/opteed/opteed_common.c | 5 ++- services/spd/tspd/tspd_common.c | 5 ++- 18 files changed, 76 insertions(+), 58 deletions(-) diff --git a/bl32/sp_min/sp_min_main.c b/bl32/sp_min/sp_min_main.c index 02663a29ea..f34716ed87 100644 --- a/bl32/sp_min/sp_min_main.c +++ b/bl32/sp_min/sp_min_main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,6 +45,7 @@ #include #include #include +#include #include "sp_min_private.h" /* Pointers to per-core cpu contexts */ @@ -203,7 +204,7 @@ void sp_min_warm_boot(void) smc_set_next_ctx(NON_SECURE); next_smc_ctx = smc_get_next_ctx(); - memset(next_smc_ctx, 0, sizeof(smc_ctx_t)); + zeromem(next_smc_ctx, sizeof(smc_ctx_t)); copy_cpu_ctx_to_smc_stx(get_regs_ctx(cm_get_context(NON_SECURE)), next_smc_ctx); diff --git a/drivers/auth/mbedtls/mbedtls_x509_parser.c b/drivers/auth/mbedtls/mbedtls_x509_parser.c index f9485de3d2..36c279f6a2 100644 --- a/drivers/auth/mbedtls/mbedtls_x509_parser.c +++ b/drivers/auth/mbedtls/mbedtls_x509_parser.c @@ -43,6 +43,7 @@ #include #include #include +#include /* mbed TLS headers */ #include @@ -71,7 +72,7 @@ static void clear_temp_vars(void) { #define ZERO_AND_CLEAN(x) \ do { \ - memset(&x, 0, sizeof(x)); \ + zeromem(&x, sizeof(x)); \ clean_dcache_range((uintptr_t)&x, sizeof(x)); \ } while (0); @@ -111,7 +112,7 @@ static int get_ext(const char *oid, void **ext, unsigned int *ext_len) MBEDTLS_ASN1_SEQUENCE); while (p < end) { - memset(&extn_oid, 0x0, sizeof(extn_oid)); + zeromem(&extn_oid, sizeof(extn_oid)); is_critical = 0; /* DEFAULT FALSE */ mbedtls_asn1_get_tag(&p, end, &len, MBEDTLS_ASN1_CONSTRUCTED | diff --git a/drivers/emmc/emmc.c b/drivers/emmc/emmc.c index 3fae2a15b8..1c1ea82ac9 100644 --- a/drivers/emmc/emmc.c +++ b/drivers/emmc/emmc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include #include #include +#include static const emmc_ops_t *ops; static unsigned int emmc_ocr_value; @@ -53,7 +54,7 @@ static int emmc_device_state(void) int ret; do { - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD13; cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET; cmd.resp_type = EMMC_RESPONSE_R1; @@ -71,7 +72,7 @@ static void emmc_set_ext_csd(unsigned int ext_cmd, unsigned int value) emmc_cmd_t cmd; int ret, state; - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD6; cmd.cmd_arg = EXTCSD_WRITE_BYTES | EXTCSD_CMD(ext_cmd) | EXTCSD_VALUE(value) | 1; @@ -107,14 +108,14 @@ static int emmc_enumerate(int clk, int bus_width) ops->init(); /* CMD0: reset to IDLE */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD0; ret = ops->send_cmd(&cmd); assert(ret == 0); while (1) { /* CMD1: get OCR register */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD1; cmd.cmd_arg = OCR_SECTOR_MODE | OCR_VDD_MIN_2V7 | OCR_VDD_MIN_1V7; @@ -127,14 +128,14 @@ static int emmc_enumerate(int clk, int bus_width) } /* CMD2: Card Identification */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD2; cmd.resp_type = EMMC_RESPONSE_R2; ret = ops->send_cmd(&cmd); assert(ret == 0); /* CMD3: Set Relative Address */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD3; cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET; cmd.resp_type = EMMC_RESPONSE_R1; @@ -142,7 +143,7 @@ static int emmc_enumerate(int clk, int bus_width) assert(ret == 0); /* CMD9: CSD Register */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD9; cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET; cmd.resp_type = EMMC_RESPONSE_R2; @@ -151,7 +152,7 @@ static int emmc_enumerate(int clk, int bus_width) memcpy(&emmc_csd, &cmd.resp_data, sizeof(cmd.resp_data)); /* CMD7: Select Card */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD7; cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET; cmd.resp_type = EMMC_RESPONSE_R1; @@ -181,7 +182,7 @@ size_t emmc_read_blocks(int lba, uintptr_t buf, size_t size) assert(ret == 0); if (is_cmd23_enabled()) { - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); /* set block count */ cmd.cmd_idx = EMMC_CMD23; cmd.cmd_arg = size / EMMC_BLOCK_SIZE; @@ -189,7 +190,7 @@ size_t emmc_read_blocks(int lba, uintptr_t buf, size_t size) ret = ops->send_cmd(&cmd); assert(ret == 0); - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD18; } else { if (size > EMMC_BLOCK_SIZE) @@ -213,7 +214,7 @@ size_t emmc_read_blocks(int lba, uintptr_t buf, size_t size) if (is_cmd23_enabled() == 0) { if (size > EMMC_BLOCK_SIZE) { - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD12; ret = ops->send_cmd(&cmd); assert(ret == 0); @@ -240,17 +241,17 @@ size_t emmc_write_blocks(int lba, const uintptr_t buf, size_t size) if (is_cmd23_enabled()) { /* set block count */ - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD23; cmd.cmd_arg = size / EMMC_BLOCK_SIZE; cmd.resp_type = EMMC_RESPONSE_R1; ret = ops->send_cmd(&cmd); assert(ret == 0); - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD25; } else { - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); if (size > EMMC_BLOCK_SIZE) cmd.cmd_idx = EMMC_CMD25; else @@ -272,7 +273,7 @@ size_t emmc_write_blocks(int lba, const uintptr_t buf, size_t size) if (is_cmd23_enabled() == 0) { if (size > EMMC_BLOCK_SIZE) { - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD12; ret = ops->send_cmd(&cmd); assert(ret == 0); @@ -291,21 +292,21 @@ size_t emmc_erase_blocks(int lba, size_t size) assert(ops != 0); assert((size != 0) && ((size % EMMC_BLOCK_SIZE) == 0)); - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD35; cmd.cmd_arg = lba; cmd.resp_type = EMMC_RESPONSE_R1; ret = ops->send_cmd(&cmd); assert(ret == 0); - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD36; cmd.cmd_arg = lba + (size / EMMC_BLOCK_SIZE) - 1; cmd.resp_type = EMMC_RESPONSE_R1; ret = ops->send_cmd(&cmd); assert(ret == 0); - memset(&cmd, 0, sizeof(emmc_cmd_t)); + zeromem(&cmd, sizeof(emmc_cmd_t)); cmd.cmd_idx = EMMC_CMD38; cmd.resp_type = EMMC_RESPONSE_R1B; ret = ops->send_cmd(&cmd); diff --git a/drivers/io/io_block.c b/drivers/io/io_block.c index 4ec59bc732..a855581b3d 100644 --- a/drivers/io/io_block.c +++ b/drivers/io/io_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include #include #include +#include typedef struct { io_block_dev_spec_t *dev_spec; @@ -135,8 +136,8 @@ static int free_dev_info(io_dev_info_t *dev_info) result = find_first_block_state(state->dev_spec, &index); if (result == 0) { /* free if device info is valid */ - memset(state, 0, sizeof(block_dev_state_t)); - memset(dev_info, 0, sizeof(io_dev_info_t)); + zeromem(state, sizeof(block_dev_state_t)); + zeromem(dev_info, sizeof(io_dev_info_t)); --block_dev_count; } diff --git a/drivers/io/io_fip.c b/drivers/io/io_fip.c index 99cf15b970..6724fc3bc1 100644 --- a/drivers/io/io_fip.c +++ b/drivers/io/io_fip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include #include #include +#include #include /* Useful for printing UUIDs when debugging.*/ @@ -351,7 +352,7 @@ static int fip_file_close(io_entity_t *entity) * If we had malloc() we would free() here. */ if (current_file.entry.offset_address != 0) { - memset(¤t_file, 0, sizeof(current_file)); + zeromem(¤t_file, sizeof(current_file)); } /* Clear the Entity info. */ diff --git a/drivers/io/io_memmap.c b/drivers/io/io_memmap.c index fe39652bd2..53af4f68fc 100644 --- a/drivers/io/io_memmap.c +++ b/drivers/io/io_memmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include #include #include +#include /* As we need to be able to keep state for seek, only one file can be open * at a time. Make this a structure and point to the entity->info. When we @@ -231,7 +232,7 @@ static int memmap_block_close(io_entity_t *entity) entity->info = 0; /* This would be a mem free() if we had malloc.*/ - memset((void *)¤t_file, 0, sizeof(current_file)); + zeromem((void *)¤t_file, sizeof(current_file)); return 0; } diff --git a/drivers/partition/gpt.c b/drivers/partition/gpt.c index 9240d5a7bf..05f13f341a 100644 --- a/drivers/partition/gpt.c +++ b/drivers/partition/gpt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include #include #include +#include static int unicode_to_ascii(unsigned short *str_in, unsigned char *str_out) { @@ -65,7 +66,7 @@ int parse_gpt_entry(gpt_entry_t *gpt_entry, partition_entry_t *entry) return -EINVAL; } - memset(entry, 0, sizeof(partition_entry_t)); + zeromem(entry, sizeof(partition_entry_t)); result = unicode_to_ascii(gpt_entry->name, (uint8_t *)entry->name); if (result != 0) { return result; diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c index 51b77595ac..df22eaf51e 100644 --- a/lib/el3_runtime/aarch32/context_mgmt.c +++ b/lib/el3_runtime/aarch32/context_mgmt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include #include #include +#include /******************************************************************************* * Context management library initialisation routine. This library is used by @@ -84,7 +85,7 @@ static void cm_init_context_common(cpu_context_t *ctx, const entry_point_info_t security_state = GET_SECURITY_STATE(ep->h.attr); /* Clear any residual register values from the context */ - memset(ctx, 0, sizeof(*ctx)); + zeromem(ctx, sizeof(*ctx)); reg_ctx = get_regs_ctx(ctx); diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c index e26950dff2..5cce8793df 100644 --- a/lib/el3_runtime/aarch64/context_mgmt.c +++ b/lib/el3_runtime/aarch64/context_mgmt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,7 @@ #include #include #include +#include /******************************************************************************* @@ -91,7 +92,7 @@ static void cm_init_context_common(cpu_context_t *ctx, const entry_point_info_t security_state = GET_SECURITY_STATE(ep->h.attr); /* Clear any residual register values from the context */ - memset(ctx, 0, sizeof(*ctx)); + zeromem(ctx, sizeof(*ctx)); /* * Base the context SCR on the current value, adjust for entry point diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c index 68cdd6eb12..b6e162bece 100644 --- a/lib/psci/psci_common.c +++ b/lib/psci/psci_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include #include #include +#include #include "psci_private.h" /* @@ -622,7 +623,7 @@ static int psci_get_ns_ep_info(entry_point_info_t *ep, SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr); ep->pc = entrypoint; - memset(&ep->args, 0, sizeof(ep->args)); + zeromem(&ep->args, sizeof(ep->args)); ep->args.arg0 = context_id; mode = scr & SCR_HCE_BIT ? MODE32_hyp : MODE32_svc; @@ -659,7 +660,7 @@ static int psci_get_ns_ep_info(entry_point_info_t *ep, SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr); ep->pc = entrypoint; - memset(&ep->args, 0, sizeof(ep->args)); + zeromem(&ep->args, sizeof(ep->args)); ep->args.arg0 = context_id; /* @@ -957,7 +958,7 @@ unsigned int psci_get_max_phys_off_afflvl(void) { psci_power_state_t state_info; - memset(&state_info, 0, sizeof(state_info)); + zeromem(&state_info, sizeof(state_info)); psci_get_target_local_pwr_states(PLAT_MAX_PWR_LVL, &state_info); return psci_find_target_suspend_lvl(&state_info); diff --git a/plat/arm/common/arm_bl2_setup.c b/plat/arm/common/arm_bl2_setup.c index 5f30708093..007108d120 100644 --- a/plat/arm/common/arm_bl2_setup.c +++ b/plat/arm/common/arm_bl2_setup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include #include #include +#include /* Data structure which holds the extents of the trusted SRAM for BL2 */ static meminfo_t bl2_tzram_layout __aligned(CACHE_WRITEBACK_GRANULE); @@ -123,7 +124,7 @@ bl31_params_t *bl2_plat_get_bl31_params(void) * Initialise the memory for all the arguments that needs to * be passed to BL31 */ - memset(&bl31_params_mem, 0, sizeof(bl2_to_bl31_params_mem_t)); + zeromem(&bl31_params_mem, sizeof(bl2_to_bl31_params_mem_t)); /* Assign memory for TF related information */ bl2_to_bl31_params = &bl31_params_mem.bl31_params; diff --git a/plat/arm/css/drivers/scpi/css_scpi.c b/plat/arm/css/drivers/scpi/css_scpi.c index f419abd03a..65ae978f86 100644 --- a/plat/arm/css/drivers/scpi/css_scpi.c +++ b/plat/arm/css/drivers/scpi/css_scpi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include #include #include +#include #include "css_mhu.h" #include "css_scpi.h" @@ -204,7 +205,8 @@ int scpi_get_css_power_state(unsigned int mpidr, unsigned int *cpu_state_p, scpi_secure_message_start(); /* Populate request headers */ - cmd = memset(SCPI_CMD_HEADER_AP_TO_SCP, 0, sizeof(*cmd)); + zeromem(SCPI_CMD_HEADER_AP_TO_SCP, sizeof(*cmd)); + cmd = SCPI_CMD_HEADER_AP_TO_SCP; cmd->id = SCPI_CMD_GET_CSS_POWER_STATE; /* diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c index 689f2d7cd9..4f7c71e4af 100644 --- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c +++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #define TEGRA_GPU_RESET_REG_OFFSET 0x28c diff --git a/plat/qemu/qemu_bl2_setup.c b/plat/qemu/qemu_bl2_setup.c index dba3beeb58..738d671ad8 100644 --- a/plat/qemu/qemu_bl2_setup.c +++ b/plat/qemu/qemu_bl2_setup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #include #include "qemu_private.h" #include - +#include /* * The next 2 constants identify the extents of the code & RO data region. @@ -91,7 +91,7 @@ bl31_params_t *bl2_plat_get_bl31_params(void) * Initialise the memory for all the arguments that needs to * be passed to BL3-1 */ - memset(&bl31_params_mem, 0, sizeof(bl2_to_bl31_params_mem_t)); + zeromem(&bl31_params_mem, sizeof(bl2_to_bl31_params_mem_t)); /* Assign memory for TF related information */ bl2_to_bl31_params = &bl31_params_mem.bl31_params; diff --git a/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c b/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c index fbf1d39786..3f6ab2f2c1 100644 --- a/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c +++ b/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ #include #include #include +#include #include "dram_spec_timing.h" static const uint8_t ddr3_cl_cwl[][7] = { @@ -228,7 +229,7 @@ static void ddr3_get_parameter(struct timing_related_config *timing_config, uint32_t ddr_capability_per_die = get_max_die_capability(timing_config); uint32_t tmp; - memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t)); + zeromem((void *)pdram_timing, sizeof(struct dram_timing_t)); pdram_timing->mhz = nmhz; pdram_timing->al = 0; pdram_timing->bl = timing_config->bl; @@ -441,7 +442,7 @@ static void lpddr2_get_parameter(struct timing_related_config *timing_config, uint32_t ddr_capability_per_die = get_max_die_capability(timing_config); uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp, twr_tmp, bl_tmp; - memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t)); + zeromem((void *)pdram_timing, sizeof(struct dram_timing_t)); pdram_timing->mhz = nmhz; pdram_timing->al = 0; pdram_timing->bl = timing_config->bl; @@ -678,7 +679,7 @@ static void lpddr3_get_parameter(struct timing_related_config *timing_config, uint32_t ddr_capability_per_die = get_max_die_capability(timing_config); uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp, twr_tmp, bl_tmp; - memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t)); + zeromem((void *)pdram_timing, sizeof(struct dram_timing_t)); pdram_timing->mhz = nmhz; pdram_timing->al = 0; pdram_timing->bl = timing_config->bl; @@ -968,7 +969,7 @@ static void lpddr4_get_parameter(struct timing_related_config *timing_config, uint32_t ddr_capability_per_die = get_max_die_capability(timing_config); uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp; - memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t)); + zeromem((void *)pdram_timing, sizeof(struct dram_timing_t)); pdram_timing->mhz = nmhz; pdram_timing->al = 0; pdram_timing->bl = timing_config->bl; diff --git a/plat/xilinx/zynqmp/pm_service/pm_client.c b/plat/xilinx/zynqmp/pm_service/pm_client.c index e102b4f2cc..0fe17b5e68 100644 --- a/plat/xilinx/zynqmp/pm_service/pm_client.c +++ b/plat/xilinx/zynqmp/pm_service/pm_client.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include #include #include +#include #include "pm_api_sys.h" #include "pm_client.h" #include "pm_ipi.h" @@ -188,7 +189,7 @@ static void pm_client_set_wakeup_sources(void) uint8_t pm_wakeup_nodes_set[NODE_MAX]; uintptr_t isenabler1 = BASE_GICD_BASE + GICD_ISENABLER + 4; - memset(&pm_wakeup_nodes_set, 0, sizeof(pm_wakeup_nodes_set)); + zeromem(&pm_wakeup_nodes_set, sizeof(pm_wakeup_nodes_set)); for (reg_num = 0; reg_num < NUM_GICD_ISENABLER; reg_num++) { uint32_t base_irq = reg_num << ISENABLER_SHIFT; diff --git a/services/spd/opteed/opteed_common.c b/services/spd/opteed/opteed_common.c index 2f20b7cae4..910f900bdd 100644 --- a/services/spd/opteed/opteed_common.c +++ b/services/spd/opteed/opteed_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include #include #include +#include #include "opteed_private.h" /******************************************************************************* @@ -73,7 +74,7 @@ void opteed_init_optee_ep_state(struct entry_point_info *optee_entry_point, DAIF_FIQ_BIT | DAIF_IRQ_BIT | DAIF_ABT_BIT); - memset(&optee_entry_point->args, 0, sizeof(optee_entry_point->args)); + zeromem(&optee_entry_point->args, sizeof(optee_entry_point->args)); } /******************************************************************************* diff --git a/services/spd/tspd/tspd_common.c b/services/spd/tspd/tspd_common.c index 3dcefea95b..70959d7567 100644 --- a/services/spd/tspd/tspd_common.c +++ b/services/spd/tspd/tspd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ #include #include #include +#include #include "tspd_private.h" /******************************************************************************* @@ -78,7 +79,7 @@ void tspd_init_tsp_ep_state(struct entry_point_info *tsp_entry_point, tsp_entry_point->spsr = SPSR_64(MODE_EL1, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS); - memset(&tsp_entry_point->args, 0, sizeof(tsp_entry_point->args)); + zeromem(&tsp_entry_point->args, sizeof(tsp_entry_point->args)); } /*******************************************************************************