From c48a5c607d8bdb422224a9767925a30d486a1109 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 18 Oct 2010 14:48:30 +0200 Subject: [PATCH] hash_md5_sha: use common finalization routine for MD5 and sha1/256. -15 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 46 ++++++------ libbb/hash_md5_sha.c | 167 ++++++++++++++++++++++--------------------- 2 files changed, 110 insertions(+), 103 deletions(-) diff --git a/include/libbb.h b/include/libbb.h index c161ed7e2..3c8764b5c 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1515,35 +1515,14 @@ enum { }; void FAST_FUNC read_base64(FILE *src_stream, FILE *dst_stream, int flags); -typedef struct sha1_ctx_t { - uint32_t hash[8]; /* 5, +3 elements for sha256 */ - uint64_t total64; /* must be directly after hash[] */ - uint8_t wbuffer[64]; /* NB: always correctly aligned for uint64_t */ - void (*process_block)(struct sha1_ctx_t*) FAST_FUNC; -} sha1_ctx_t; -void sha1_begin(sha1_ctx_t *ctx) FAST_FUNC; -void sha1_hash(sha1_ctx_t *ctx, const void *data, size_t length) FAST_FUNC; -void sha1_end(sha1_ctx_t *ctx, void *resbuf) FAST_FUNC; -typedef struct sha1_ctx_t sha256_ctx_t; -void sha256_begin(sha256_ctx_t *ctx) FAST_FUNC; -#define sha256_hash sha1_hash -#define sha256_end sha1_end -typedef struct sha512_ctx_t { - uint64_t hash[8]; - uint64_t total64[2]; /* must be directly after hash[] */ - uint8_t wbuffer[128]; /* NB: always correctly aligned for uint64_t */ -} sha512_ctx_t; -void sha512_begin(sha512_ctx_t *ctx) FAST_FUNC; -void sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC; -void sha512_end(sha512_ctx_t *ctx, void *resbuf) FAST_FUNC; #if 1 typedef struct md5_ctx_t { + char wbuffer[64]; /* NB: always correctly aligned for uint64_t */ + uint64_t total64; uint32_t A; uint32_t B; uint32_t C; uint32_t D; - uint64_t total64; - char wbuffer[64]; /* NB: always correctly aligned for uint64_t */ } md5_ctx_t; #else /* libbb/md5prime.c uses a bit different one: */ @@ -1556,6 +1535,27 @@ typedef struct md5_ctx_t { void md5_begin(md5_ctx_t *ctx) FAST_FUNC; void md5_hash(md5_ctx_t *ctx, const void *data, size_t length) FAST_FUNC; void md5_end(md5_ctx_t *ctx, void *resbuf) FAST_FUNC; +typedef struct sha1_ctx_t { + uint8_t wbuffer[64]; /* NB: always correctly aligned for uint64_t */ + uint64_t total64; /* must be directly before hash[] */ + uint32_t hash[8]; /* 5, +3 elements for sha256 */ + void (*process_block)(struct sha1_ctx_t*) FAST_FUNC; +} sha1_ctx_t; +void sha1_begin(sha1_ctx_t *ctx) FAST_FUNC; +void sha1_hash(sha1_ctx_t *ctx, const void *data, size_t length) FAST_FUNC; +void sha1_end(sha1_ctx_t *ctx, void *resbuf) FAST_FUNC; +typedef struct sha1_ctx_t sha256_ctx_t; +void sha256_begin(sha256_ctx_t *ctx) FAST_FUNC; +#define sha256_hash sha1_hash +#define sha256_end sha1_end +typedef struct sha512_ctx_t { + uint64_t total64[2]; /* must be directly before hash[] */ + uint64_t hash[8]; + uint8_t wbuffer[128]; /* NB: always correctly aligned for uint64_t */ +} sha512_ctx_t; +void sha512_begin(sha512_ctx_t *ctx) FAST_FUNC; +void sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC; +void sha512_end(sha512_ctx_t *ctx, void *resbuf) FAST_FUNC; uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC; diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 3e708ef7e..b07ba55f7 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -1,4 +1,72 @@ /* vi: set sw=4 ts=4: */ +/* + * Utility routines. + * + * Copyright (C) 2010 Denys Vlasenko + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" + +/* gcc 4.2.1 optimizes rotr64 better with inline than with macro + * (for rotX32, there is no difference). Why? My guess is that + * macro requires clever common subexpression elimination heuristics + * in gcc, while inline basically forces it to happen. + */ +//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n) +{ + return (x << n) | (x >> (32 - n)); +} +//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) +static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n) +{ + return (x >> n) | (x << (32 - n)); +} +/* rotr64 in needed for sha512 only: */ +//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) +static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n) +{ + return (x >> n) | (x << (64 - n)); +} + + +typedef struct common64_ctx_t { + char wbuffer[64]; /* NB: always correctly aligned for uint64_t */ + uint64_t total64; +} common64_ctx_t; + +typedef void FAST_FUNC process_block64_func(void*); + +static void FAST_FUNC common64_end(void *vctx, process_block64_func process_block64, int swap_needed) +{ + common64_ctx_t *ctx = vctx; + unsigned bufpos = ctx->total64 & 63; + /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ + ctx->wbuffer[bufpos++] = 0x80; + + /* This loop iterates either once or twice, no more, no less */ + while (1) { + unsigned remaining = 64 - bufpos; + memset(ctx->wbuffer + bufpos, 0, remaining); + /* Do we have enough space for the length count? */ + if (remaining >= 8) { + /* Store the 64-bit counter of bits in the buffer */ + uint64_t t = ctx->total64 << 3; + if (swap_needed) + t = bb_bswap_64(t); + /* wbuffer is suitably aligned for this */ + *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; + } + process_block64(ctx); + if (remaining >= 8) + break; + bufpos = 0; + } +} + + /* * Based on shasum from http://www.netsw.org/crypto/hash/ * Majorly hacked up to use Dr Brian Gladman's sha1 code @@ -28,31 +96,6 @@ * then rebuild and compare "shaNNNsum bigfile" results. */ -#include "libbb.h" - -/* gcc 4.2.1 optimizes rotr64 better with inline than with macro - * (for rotX32, there is no difference). Why? My guess is that - * macro requires clever common subexpression elimination heuristics - * in gcc, while inline basically forces it to happen. - */ -//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) -static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n) -{ - return (x << n) | (x >> (32 - n)); -} -//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) -static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n) -{ - return (x >> n) | (x << (32 - n)); -} -/* rotr64 in needed for sha512 only: */ -//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) -static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n) -{ - return (x >> n) | (x << (64 - n)); -} - - static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) { unsigned t; @@ -308,6 +351,8 @@ void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) } static const uint32_t init256[] = { + 0, + 0, 0x6a09e667, 0xbb67ae85, 0x3c6ef372, @@ -316,10 +361,10 @@ static const uint32_t init256[] = { 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, - 0, - 0, }; static const uint32_t init512_lo[] = { + 0, + 0, 0xf3bcc908, 0x84caa73b, 0xfe94f82b, @@ -328,16 +373,14 @@ static const uint32_t init512_lo[] = { 0x2b3e6c1f, 0xfb41bd6b, 0x137e2179, - 0, - 0, }; /* Initialize structure containing state of computation. (FIPS 180-2:5.3.2) */ void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) { - memcpy(ctx->hash, init256, sizeof(init256)); - /*ctx->total64 = 0; - done by extending init256 with two 32-bit zeros */ + memcpy(&ctx->total64, init256, sizeof(init256)); + /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */ ctx->process_block = sha256_process_block64; } @@ -346,9 +389,10 @@ void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) { int i; - /* Two extra iterations zero out ctx->total64[] */ - for (i = 0; i < 8+2; i++) - ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; + /* Two extra iterations zero out ctx->total64[2] */ + uint64_t *tp = ctx->total64; + for (i = 0; i < 2+8; i++) + tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; /*ctx->total64[0] = ctx->total64[1] = 0; - already done */ } @@ -448,37 +492,19 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) /* Used also for sha256 */ void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf) { - unsigned bufpos = ctx->total64 & 63; + unsigned hash_size; - /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ - ctx->wbuffer[bufpos++] = 0x80; + /* SHA stores total in BE, need to swap on LE arches: */ + common64_end(ctx, (process_block64_func*) ctx->process_block, /*swap_needed:*/ BB_LITTLE_ENDIAN); - /* This loop iterates either once or twice, no more, no less */ - while (1) { - unsigned remaining = 64 - bufpos; - memset(ctx->wbuffer + bufpos, 0, remaining); - /* Do we have enough space for the length count? */ - if (remaining >= 8) { - /* Store the 64-bit counter of bits in the buffer in BE format */ - uint64_t t = ctx->total64 << 3; - t = SWAP_BE64(t); - /* wbuffer is suitably aligned for this */ - *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; - } - ctx->process_block(ctx); - if (remaining >= 8) - break; - bufpos = 0; - } - - bufpos = (ctx->process_block == sha1_process_block64) ? 5 : 8; + hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8; /* This way we do not impose alignment constraints on resbuf: */ if (BB_LITTLE_ENDIAN) { unsigned i; - for (i = 0; i < bufpos; ++i) + for (i = 0; i < hash_size; ++i) ctx->hash[i] = SWAP_BE32(ctx->hash[i]); } - memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * bufpos); + memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size); } void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) @@ -566,7 +592,7 @@ void FAST_FUNC md5_begin(md5_ctx_t *ctx) #define FI(b, c, d) (c ^ (b | ~d)) /* Hash a single block, 64 bytes long and 4-byte aligned */ -static void md5_process_block64(md5_ctx_t *ctx) +static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) { #if MD5_SIZE_VS_SPEED > 0 /* Before we start, one word to the strange constants. @@ -927,27 +953,8 @@ void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len) */ void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf) { - unsigned bufpos = ctx->total64 & 63; - /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ - ctx->wbuffer[bufpos++] = 0x80; - - /* This loop iterates either once or twice, no more, no less */ - while (1) { - unsigned remaining = 64 - bufpos; - memset(ctx->wbuffer + bufpos, 0, remaining); - /* Do we have enough space for the length count? */ - if (remaining >= 8) { - /* Store the 64-bit counter of bits in the buffer in LE format */ - uint64_t t = ctx->total64 << 3; - t = SWAP_LE64(t); - /* wbuffer is suitably aligned for this */ - *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; - } - md5_process_block64(ctx); - if (remaining >= 8) - break; - bufpos = 0; - } + /* MD5 stores total in LE, need to swap on BE arches: */ + common64_end(ctx, (process_block64_func*) md5_process_block64, /*swap_needed:*/ BB_BIG_ENDIAN); /* The MD5 result is in little endian byte order. * We (ab)use the fact that A-D are consecutive in memory.