mirror of
https://github.com/sheumann/hush.git
synced 2025-01-10 16:29:44 +00:00
sha3: make size/speed optimization decision configurable
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
60cb48ca50
commit
30a8652fbf
@ -28,6 +28,16 @@ config MD5_SMALL
|
|||||||
2 3.0 5088
|
2 3.0 5088
|
||||||
3 (smallest) 5.1 4912
|
3 (smallest) 5.1 4912
|
||||||
|
|
||||||
|
config SHA3_SMALL
|
||||||
|
int "SHA3: Trade bytes for speed (0:fast, 1:slow)"
|
||||||
|
default 1
|
||||||
|
range 0 1
|
||||||
|
help
|
||||||
|
Trade binary size versus speed for the sha3sum algorithm.
|
||||||
|
SHA3_SMALL=0 compared to SHA3_SMALL=1 (approximate):
|
||||||
|
64-bit x86: +270 bytes of code, 45% faster
|
||||||
|
32-bit x86: +450 bytes of code, 75% faster
|
||||||
|
|
||||||
config FEATURE_FAST_TOP
|
config FEATURE_FAST_TOP
|
||||||
bool "Faster /proc scanning code (+100 bytes)"
|
bool "Faster /proc scanning code (+100 bytes)"
|
||||||
default y
|
default y
|
||||||
|
@ -918,6 +918,16 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
|
|||||||
* Busybox modifications (C) Lauri Kasanen, under the GPLv2.
|
* Busybox modifications (C) Lauri Kasanen, under the GPLv2.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if CONFIG_SHA3_SMALL < 0
|
||||||
|
# define SHA3_SMALL 0
|
||||||
|
#elif CONFIG_SHA3_SMALL > 1
|
||||||
|
# define SHA3_SMALL 1
|
||||||
|
#else
|
||||||
|
# define SHA3_SMALL CONFIG_SHA3_SMALL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t))
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
cKeccakR_SizeInBytes = 576 / 8,
|
cKeccakR_SizeInBytes = 576 / 8,
|
||||||
cKeccakNumberOfRounds = 24,
|
cKeccakNumberOfRounds = 24,
|
||||||
@ -967,8 +977,6 @@ static const uint8_t KeccakF_Mod5[10] = {
|
|||||||
static void KeccakF(uint64_t *state)
|
static void KeccakF(uint64_t *state)
|
||||||
{
|
{
|
||||||
uint8_t x, y;
|
uint8_t x, y;
|
||||||
uint64_t temp;
|
|
||||||
uint64_t BC[5];
|
|
||||||
int round;
|
int round;
|
||||||
|
|
||||||
if (BB_BIG_ENDIAN) {
|
if (BB_BIG_ENDIAN) {
|
||||||
@ -979,30 +987,61 @@ static void KeccakF(uint64_t *state)
|
|||||||
|
|
||||||
for (round = 0; round < cKeccakNumberOfRounds; ++round) {
|
for (round = 0; round < cKeccakNumberOfRounds; ++round) {
|
||||||
/* Theta */
|
/* Theta */
|
||||||
for (x = 0; x < 5; ++x) {
|
{
|
||||||
BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
|
uint64_t BC[5];
|
||||||
state[15 + x] ^ state[20 + x];
|
for (x = 0; x < 5; ++x) {
|
||||||
}
|
BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
|
||||||
for (x = 0; x < 5; ++x) {
|
state[15 + x] ^ state[20 + x];
|
||||||
temp = BC[KeccakF_Mod5[x + 4]] ^
|
}
|
||||||
rotl64(BC[KeccakF_Mod5[x + 1]], 1);
|
for (x = 0; x < 5; ++x) {
|
||||||
|
uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^
|
||||||
for (y = 0; y <= 20; y += 5) {
|
rotl64(BC[KeccakF_Mod5[x + 1]], 1);
|
||||||
state[y + x] ^= temp;
|
if (SHA3_SMALL && !ARCH_IS_64BIT) {
|
||||||
|
for (y = 0; y <= 20; y += 5)
|
||||||
|
state[y + x] ^= temp;
|
||||||
|
} else {
|
||||||
|
/* on 64-bit arch, this is actually smaller too */
|
||||||
|
state[0 + x] ^= temp;
|
||||||
|
state[5 + x] ^= temp;
|
||||||
|
state[10 + x] ^= temp;
|
||||||
|
state[15 + x] ^= temp;
|
||||||
|
state[20 + x] ^= temp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rho Pi */
|
/* Rho Pi */
|
||||||
temp = state[1];
|
if (SHA3_SMALL) {
|
||||||
for (x = 0; x < 24; ++x) {
|
uint64_t t1 = state[1];
|
||||||
BC[0] = state[KeccakF_PiLane[x]];
|
for (x = 0; x < 24; ++x) {
|
||||||
state[KeccakF_PiLane[x]] =
|
uint64_t t0 = state[KeccakF_PiLane[x]];
|
||||||
rotl64(temp, KeccakF_RotationConstants[x]);
|
state[KeccakF_PiLane[x]] = rotl64(t1, KeccakF_RotationConstants[x]);
|
||||||
temp = BC[0];
|
t1 = t0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Especially large benefit for 32-bit arch:
|
||||||
|
* 64-bit rotations by non-constant usually are SLOW on those.
|
||||||
|
* We resort to unrolling here.
|
||||||
|
* This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
|
||||||
|
* but generates 300-500 more bytes of code.
|
||||||
|
*/
|
||||||
|
uint64_t t0;
|
||||||
|
uint64_t t1 = state[1];
|
||||||
|
#define RhoPi_twice(x) \
|
||||||
|
t0 = state[KeccakF_PiLane[x ]]; state[KeccakF_PiLane[x ]] = rotl64(t1, KeccakF_RotationConstants[x ]); \
|
||||||
|
t1 = state[KeccakF_PiLane[x+1]]; state[KeccakF_PiLane[x+1]] = rotl64(t0, KeccakF_RotationConstants[x+1]);
|
||||||
|
RhoPi_twice(0); RhoPi_twice(2);
|
||||||
|
RhoPi_twice(4); RhoPi_twice(6);
|
||||||
|
RhoPi_twice(8); RhoPi_twice(10);
|
||||||
|
RhoPi_twice(12); RhoPi_twice(14);
|
||||||
|
RhoPi_twice(16); RhoPi_twice(18);
|
||||||
|
RhoPi_twice(20); RhoPi_twice(22);
|
||||||
|
#undef RhoPi_twice
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Chi */
|
/* Chi */
|
||||||
for (y = 0; y < 25; y += 5) {
|
for (y = 0; y <= 20; y += 5) {
|
||||||
|
uint64_t BC[5];
|
||||||
BC[0] = state[y + 0];
|
BC[0] = state[y + 0];
|
||||||
BC[1] = state[y + 1];
|
BC[1] = state[y + 1];
|
||||||
BC[2] = state[y + 2];
|
BC[2] = state[y + 2];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user