tls: format and send CLIENT_KEY_EXCHANGE

$ ./busybox tls kernel.org
insize:0 tail:0
got block len:74
got HANDSHAKE
got SERVER_HELLO
insize:79 tail:4265
got block len:4392
got HANDSHAKE
got CERTIFICATE
entered der @0x8b217a7:0x30 len:1452 inner_byte @0x8b217ab:0x30
entered der @0x8b217ab:0x30 len:1172 inner_byte @0x8b217af:0xa0
skipped der 0xa0, next byte 0x02
skipped der 0x02, next byte 0x30
skipped der 0x30, next byte 0x30
skipped der 0x30, next byte 0x30
skipped der 0x30, next byte 0x30
skipped der 0x30, next byte 0x30
entered der @0x8b218b4:0x30 len:418 inner_byte @0x8b218b8:0x30
skipped der 0x30, next byte 0x03
entered der @0x8b218c7:0x03 len:399 inner_byte @0x8b218cb:0x00
key bytes:399, first:0x00
entered der @0x8b218cc:0x30 len:394 inner_byte @0x8b218d0:0x02
binary bytes:385, first:0x00
skipped der 0x02, next byte 0x02
binary bytes:3, first:0x01
server_rsa_pub_key.size:384
insize:4397 tail:9
got block len:4
got SERVER_HELLO_DONE
insize:9 tail:0
^C

Next step: send CHANGE_CIPHER_SPEC... and actually implement it.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2017-01-15 00:12:42 +01:00
parent 2a17d1fc9b
commit 11d0096516
9 changed files with 5280 additions and 36 deletions

View File

@ -1,7 +1,7 @@
/*
* Licensed under GPLv2, see file LICENSE in this source tree.
*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
//config:config TLS
//config: bool "tls (debugging)"
@ -10,6 +10,11 @@
//applet:IF_TLS(APPLET(tls, BB_DIR_USR_BIN, BB_SUID_DROP))
//kbuild:lib-$(CONFIG_TLS) += tls.o
//kbuild:lib-$(CONFIG_TLS) += tls_pstm.o
//kbuild:lib-$(CONFIG_TLS) += tls_pstm_montgomery_reduce.o
//kbuild:lib-$(CONFIG_TLS) += tls_pstm_mul_comba.o
//kbuild:lib-$(CONFIG_TLS) += tls_pstm_sqr_comba.o
//kbuild:lib-$(CONFIG_TLS) += tls_rsa.o
////kbuild:lib-$(CONFIG_TLS) += tls_ciphers.o
////kbuild:lib-$(CONFIG_TLS) += tls_aes.o
////kbuild:lib-$(CONFIG_TLS) += tls_aes_gcm.o
@ -18,9 +23,7 @@
//usage: "HOST[:PORT]"
//usage:#define tls_full_usage "\n\n"
#include "libbb.h"
//#include "tls_cryptoapi.h"
//#include "tls_ciphers.h"
#include "tls.h"
#if 1
# define dbg(...) fprintf(stderr, __VA_ARGS__)
@ -28,23 +31,26 @@
# define dbg(...) ((void)0)
#endif
#define RECORD_TYPE_CHANGE_CIPHER_SPEC 20
#define RECORD_TYPE_ALERT 21
#define RECORD_TYPE_HANDSHAKE 22
#define RECORD_TYPE_APPLICATION_DATA 23
#define RECORD_TYPE_CHANGE_CIPHER_SPEC 20
#define RECORD_TYPE_ALERT 21
#define RECORD_TYPE_HANDSHAKE 22
#define RECORD_TYPE_APPLICATION_DATA 23
#define HANDSHAKE_HELLO_REQUEST 0
#define HANDSHAKE_CLIENT_HELLO 1
#define HANDSHAKE_SERVER_HELLO 2
#define HANDSHAKE_HELLO_VERIFY_REQUEST 3
#define HANDSHAKE_NEW_SESSION_TICKET 4
#define HANDSHAKE_CERTIFICATE 11
#define HANDSHAKE_SERVER_KEY_EXCHANGE 12
#define HANDSHAKE_CERTIFICATE_REQUEST 13
#define HANDSHAKE_SERVER_HELLO_DONE 14
#define HANDSHAKE_CERTIFICATE_VERIFY 15
#define HANDSHAKE_CLIENT_KEY_EXCHANGE 16
#define HANDSHAKE_FINISHED 20
#define HANDSHAKE_HELLO_REQUEST 0
#define HANDSHAKE_CLIENT_HELLO 1
#define HANDSHAKE_SERVER_HELLO 2
#define HANDSHAKE_HELLO_VERIFY_REQUEST 3
#define HANDSHAKE_NEW_SESSION_TICKET 4
#define HANDSHAKE_CERTIFICATE 11
#define HANDSHAKE_SERVER_KEY_EXCHANGE 12
#define HANDSHAKE_CERTIFICATE_REQUEST 13
#define HANDSHAKE_SERVER_HELLO_DONE 14
#define HANDSHAKE_CERTIFICATE_VERIFY 15
#define HANDSHAKE_CLIENT_KEY_EXCHANGE 16
#define HANDSHAKE_FINISHED 20
#define SSL_HS_RANDOM_SIZE 32
#define SSL_HS_RSA_PREMASTER_SIZE 48
#define SSL_NULL_WITH_NULL_NULL 0x0000
#define SSL_RSA_WITH_NULL_MD5 0x0001
@ -112,6 +118,7 @@
//TLS 1.2
#define TLS_MAJ 3
#define TLS_MIN 3
//#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA // ok, recvs SERVER_KEY_EXCHANGE *** matrixssl uses this on my box
//#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE
// All GCMs:
//#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE
@ -123,9 +130,9 @@
//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE
//#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE
#define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE
#define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE *** select this?
//#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE
// (tested b/c this one doesn't req server certs... no luck)
//^^^^^^^^^^^^^^^^^^^^^^^ (tested b/c this one doesn't req server certs... no luck)
//test TLS_RSA_WITH_AES_128_CBC_SHA, in tls 1.2 it's mandated to be always supported
struct record_hdr {
@ -137,8 +144,7 @@ struct record_hdr {
typedef struct tls_state {
int fd;
uint8_t *pubkey;
int pubkey_len;
psRsaKey_t server_rsa_pub_key;
// RFC 5246
// |6.2.1. Fragmentation
@ -170,6 +176,12 @@ typedef struct tls_state {
uint8_t inbuf[18*1024];
} tls_state_t;
void tls_get_random(void *buf, unsigned len)
{
if (len != open_read_close("/dev/urandom", buf, len))
xfunc_die();
}
static
tls_state_t *new_tls_state(void)
{
@ -286,7 +298,7 @@ static void send_client_hello(tls_state_t *tls)
hello.len24_lo = (sizeof(hello) - sizeof(hello.xhdr) - 4);
hello.proto_maj = TLS_MAJ;
hello.proto_min = TLS_MIN;
open_read_close("/dev/urandom", hello.rand32, sizeof(hello.rand32));
tls_get_random(hello.rand32, sizeof(hello.rand32));
//hello.session_id_len = 0;
//hello.cipherid_len16_hi = 0;
hello.cipherid_len16_lo = 2 * 1;
@ -407,7 +419,18 @@ static uint8_t *skip_der_item(uint8_t *der, uint8_t *end)
return new_der;
}
static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len)
static void der_binary_to_pstm(pstm_int *pstm_n, uint8_t *der, uint8_t *end)
{
uint8_t *bin_ptr;
unsigned len = get_der_len(&bin_ptr, der, end);
dbg("binary bytes:%u, first:0x%02x\n", len, bin_ptr[0]);
pstm_init_for_read_unsigned_bin(/*pool:*/ NULL, pstm_n, len);
pstm_read_unsigned_bin(pstm_n, bin_ptr, len);
//return bin + len;
}
static void find_key_in_der_cert(tls_state_t *tls, uint8_t *der, int len)
{
/* Certificate is a DER-encoded data structure. Each DER element has a length,
* which makes it easy to skip over large compound elements of any complexity
@ -504,19 +527,43 @@ static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len)
der = skip_der_item(der, end); /* validity */
der = skip_der_item(der, end); /* subject */
/* enter "subjectPublicKeyInfo" */
/* enter subjectPublicKeyInfo */
der = enter_der_item(der, &end);
/* skip "subjectPublicKeyInfo.algorithm" */
{ /* check subjectPublicKeyInfo.algorithm */
static const uint8_t expected[] = {
0x30,0x0d, // SEQ 13 bytes
0x06,0x09, 0x2a,0x86,0x48,0x86,0xf7,0x0d,0x01,0x01,0x01, // OID RSA_KEY_ALG 42.134.72.134.247.13.1.1.1
//0x05,0x00, // NULL
};
if (memcmp(der, expected, sizeof(expected)) != 0)
bb_error_msg_and_die("not RSA key");
}
/* skip subjectPublicKeyInfo.algorithm */
der = skip_der_item(der, end);
/* enter "subjectPublicKeyInfo.publicKey" */
/* enter subjectPublicKeyInfo.publicKey */
// die_if_not_this_der_type(der, end, 0x03); /* must be BITSTRING */
der = enter_der_item(der, &end);
/* return a copy */
*key_len = end - der;
dbg("copying key bytes:%u, first:0x%02x\n", *key_len, der[0]);
return xmemdup(der, *key_len);
/* parse RSA key: */
//based on getAsnRsaPubKey(), pkcs1ParsePrivBin() is also of note
dbg("key bytes:%u, first:0x%02x\n", (int)(end - der), der[0]);
if (end - der < 14) xfunc_die();
/* example format:
* ignore bits: 00
* SEQ 0x018a/394 bytes: 3082018a
* INTEGER 0x0181/385 bytes (modulus): 02820181 XX...XXX
* INTEGER 3 bytes (exponent): 0203 010001
*/
if (*der != 0) /* "ignore bits", should be 0 */
xfunc_die();
der++;
der = enter_der_item(der, &end); /* enter SEQ */
//memset(tls->server_rsa_pub_key, 0, sizeof(tls->server_rsa_pub_key));
der_binary_to_pstm(&tls->server_rsa_pub_key.N, der, end); /* modulus */
der = skip_der_item(der, end);
der_binary_to_pstm(&tls->server_rsa_pub_key.e, der, end); /* exponent */
tls->server_rsa_pub_key.size = pstm_unsigned_bin_size(&tls->server_rsa_pub_key.N);
dbg("server_rsa_pub_key.size:%d\n", tls->server_rsa_pub_key.size);
}
static void get_server_cert_or_die(tls_state_t *tls)
@ -553,7 +600,107 @@ static void get_server_cert_or_die(tls_state_t *tls)
len = len1;
if (len)
tls->pubkey = find_key_in_der_cert(&tls->pubkey_len, certbuf + 10, len);
find_key_in_der_cert(tls, certbuf + 10, len);
}
static void send_client_key_exchange(tls_state_t *tls)
{
#if 0 //matrixssl code snippets:
int32 csRsaEncryptPub(psPool_t *pool, psPubKey_t *key,
unsigned char *in, uint32 inlen, unsigned char *out, uint32 outlen,
void *data)
{
psAssert(key->type == PS_RSA);
return psRsaEncryptPub(pool, (psRsaKey_t*)key->key, in, inlen, out, outlen,
data);
}
...
/* pkaAfter.user is buffer len */
if ((rc = csRsaEncryptPub(pka->pool, &ssl->sec.cert->publicKey,
ssl->sec.premaster, ssl->sec.premasterSize, pka->outbuf,
pka->user, pka->data)) < 0) {
if (rc == PS_PENDING) {
/* For these ClientKeyExchange paths, we do want to come
back through nowDoCkePka for a double pass so each
case can manage its own pkaAfter and to make sure
psX509FreeCert and sslCreateKeys() are hit below. */
return rc;
}
psTraceIntInfo("csRsaEncryptPub in CKE failed %d\n", rc);
return MATRIXSSL_ERROR;
}
/* RSA closed the pool on second pass */
pka->pool = NULL;
clearPkaAfter(ssl);
...
#ifdef USE_RSA_CIPHER_SUITE
/*
Standard RSA suite
*/
ssl->sec.premasterSize = SSL_HS_RSA_PREMASTER_SIZE;
ssl->sec.premaster = psMalloc(ssl->hsPool,
SSL_HS_RSA_PREMASTER_SIZE);
if (ssl->sec.premaster == NULL) {
return SSL_MEM_ERROR;
}
ssl->sec.premaster[0] = ssl->reqMajVer;
ssl->sec.premaster[1] = ssl->reqMinVer;
if (matrixCryptoGetPrngData(ssl->sec.premaster + 2,
SSL_HS_RSA_PREMASTER_SIZE - 2, ssl->userPtr) < 0) {
return MATRIXSSL_ERROR;
}
/* Shedule RSA encryption. Put tmp pool under control of After */
pkaAfter->type = PKA_AFTER_RSA_ENCRYPT;
pkaAfter->outbuf = c;
pkaAfter->data = pkiData;
pkaAfter->pool = pkiPool;
pkaAfter->user = (uint32)(end - c); /* Available space */
c += keyLen;
#endif
#endif // 0
struct client_key_exchange {
struct record_hdr xhdr;
uint8_t type;
uint8_t len24_hi, len24_mid, len24_lo;
uint8_t keylen16_hi, keylen16_lo; /* exist for RSA, but not for some other key types */
//had a bug when had no keylen: we:
//write(3, "\x16\x03\x03\x01\x84\x10\x00\x01\x80\xXX\xXX\xXX\xXX\xXX\xXX...", 393) = 393
//openssl:
//write to 0xe9a090 [0xf9ac20] (395 bytes => 395 (0x18B))
//0000 - 16 03 03 01 86 10 00 01 -82 01 80 xx xx xx xx xx
uint8_t key[384]; // size??
};
struct client_key_exchange record;
uint8_t premaster[SSL_HS_RSA_PREMASTER_SIZE];
memset(&record, 0, sizeof(record));
record.xhdr.type = RECORD_TYPE_HANDSHAKE;
record.xhdr.proto_maj = TLS_MAJ;
record.xhdr.proto_min = TLS_MIN;
record.xhdr.len16_hi = (sizeof(record) - sizeof(record.xhdr)) >> 8;
record.xhdr.len16_lo = (sizeof(record) - sizeof(record.xhdr)) & 0xff;
record.type = HANDSHAKE_CLIENT_KEY_EXCHANGE;
//record.len24_hi = 0;
record.len24_mid = (sizeof(record) - sizeof(record.xhdr) - 4) >> 8;
record.len24_lo = (sizeof(record) - sizeof(record.xhdr) - 4) & 0xff;
record.keylen16_hi = (sizeof(record) - sizeof(record.xhdr) - 6) >> 8;
record.keylen16_lo = (sizeof(record) - sizeof(record.xhdr) - 6) & 0xff;
tls_get_random(premaster, sizeof(premaster));
premaster[0] = TLS_MAJ;
premaster[1] = TLS_MIN;
psRsaEncryptPub(/*pool:*/ NULL,
/* psRsaKey_t* */ &tls->server_rsa_pub_key,
premaster, /*inlen:*/ sizeof(premaster),
record.key, sizeof(record.key),
data_param_ignored
);
xwrite(tls->fd, &record, sizeof(record));
}
static void tls_handshake(tls_state_t *tls)
@ -614,6 +761,8 @@ static void tls_handshake(tls_state_t *tls)
// 459 bytes:
// 0c 00|01|c7 03|00|17|41|04|87|94|2e|2f|68|d0|c9|f4|97|a8|2d|ef|ed|67|ea|c6|f3|b3|56|47|5d|27|b6|bd|ee|70|25|30|5e|b0|8e|f6|21|5a...
//SvKey len=455^
// with TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA: 461 bytes:
// 0c 00|01|c9 03|00|17|41|04|cd|9b|b4|29|1f|f6|b0|c2|84|82|7f|29|6a|47|4e|ec|87|0b|c1|9c|69|e1|f8|c6|d0|53|e9|27|90|a5|c8|02|15|75...
dbg("got SERVER_KEY_EXCHANGE\n");
len = xread_tls_block(tls);
break;
@ -624,6 +773,8 @@ static void tls_handshake(tls_state_t *tls)
case HANDSHAKE_SERVER_HELLO_DONE:
// 0e 000000 (len:0)
dbg("got SERVER_HELLO_DONE\n");
send_client_key_exchange(tls);
len = xread_tls_block(tls);
break;
default:
tls_error_die(tls);

73
networking/tls.h Normal file
View File

@ -0,0 +1,73 @@
/*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "libbb.h"
/* config tweaks */
#define HAVE_NATIVE_INT64 1
#undef DISABLE_PSTM
#undef USE_1024_KEY_SPEED_OPTIMIZATIONS
#undef USE_2048_KEY_SPEED_OPTIMIZATIONS
//TODO: enable to use asm:
//#if defined(__GNUC__) && defined(__i386__) -> #define PSTM_32BIT and PSTM_X86
//#if defined(__GNUC__) && defined(__x86_64__) -> #define PSTM_64BIT and PSTM_X86_64
//ARM and MIPS also have these
#define PS_SUCCESS 0
#define PS_FAILURE -1
#define PS_ARG_FAIL -6 /* Failure due to bad function param */
#define PS_PLATFORM_FAIL -7 /* Failure as a result of system call error */
#define PS_MEM_FAIL -8 /* Failure to allocate requested memory */
#define PS_LIMIT_FAIL -9 /* Failure on sanity/limit tests */
#define PS_TRUE 1
#define PS_FALSE 0
#if BB_BIG_ENDIAN
# define ENDIAN_BIG 1
# undef ENDIAN_LITTLE
//#???? ENDIAN_32BITWORD
// controls only STORE32L, which we don't use
#else
# define ENDIAN_LITTLE 1
# undef ENDIAN_BIG
#endif
typedef uint64_t uint64;
typedef int64_t int64;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint16_t uint16;
typedef int16_t int16;
//FIXME
typedef char psPool_t;
//#ifdef PS_PUBKEY_OPTIMIZE_FOR_SMALLER_RAM
#define PS_EXPTMOD_WINSIZE 3
//#ifdef PS_PUBKEY_OPTIMIZE_FOR_FASTER_SPEED
//#define PS_EXPTMOD_WINSIZE 5
#define PUBKEY_TYPE 0x01
#define PRIVKEY_TYPE 0x02
void tls_get_random(void *buf, unsigned len);
#define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS)
#define psFree(p, pool) free(p)
#define psTraceCrypto(msg) bb_error_msg_and_die(msg)
/* Secure zerofill */
#define memset_s(A,B,C,D) memset((A),(C),(D))
/* Constant time memory comparison */
#define memcmpct(s1, s2, len) memcmp((s1), (s2), (len))
#undef min
#define min(x, y) ((x) < (y) ? (x) : (y))
#include "tls_pstm.h"
#include "tls_rsa.h"

2254
networking/tls_pstm.c Normal file

File diff suppressed because it is too large Load Diff

238
networking/tls_pstm.h Normal file
View File

@ -0,0 +1,238 @@
/**
* @file pstm.h
* @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
*
* multiple-precision integer library.
*/
/*
* Copyright (c) 2013-2015 INSIDE Secure Corporation
* Copyright (c) PeerSec Networks, 2002-2011
* All Rights Reserved
*
* The latest version of this code is available at http://www.matrixssl.org
*
* This software is open source; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This General Public License does NOT permit incorporating this software
* into proprietary programs. If you are unable to comply with the GPL, a
* commercial license for this software may be purchased from INSIDE at
* http://www.insidesecure.com/eng/Company/Locations
*
* This program is distributed in WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* http://www.gnu.org/copyleft/gpl.html
*/
/******************************************************************************/
#ifndef _h_PSTMATH
#define _h_PSTMATH
#ifndef DISABLE_PSTM
/* Define this here to avoid including circular limits.h on some platforms */
#ifndef CHAR_BIT
#define CHAR_BIT 8
#endif
/******************************************************************************/
/*
If native 64 bit integers are not supported, we do not support 32x32->64
in hardware, so we must set the 16 bit flag to produce 16x16->32 products.
*/
#ifndef HAVE_NATIVE_INT64
#define PSTM_16BIT
#endif /* ! HAVE_NATIVE_INT64 */
/******************************************************************************/
/*
Some default configurations.
pstm_word should be the largest value the processor can hold as the product
of a multiplication. Most platforms support a 32x32->64 MAC instruction,
so 64bits is the default pstm_word size.
pstm_digit should be half the size of pstm_word
*/
#ifdef PSTM_8BIT
/* 8-bit digits, 16-bit word products */
typedef unsigned char pstm_digit;
typedef unsigned short pstm_word;
#define DIGIT_BIT 8
#elif defined(PSTM_16BIT)
/* 16-bit digits, 32-bit word products */
typedef unsigned short pstm_digit;
typedef unsigned long pstm_word;
#define DIGIT_BIT 16
#elif defined(PSTM_64BIT)
/* 64-bit digits, 128-bit word products */
#ifndef __GNUC__
#error "64bit digits requires GCC"
#endif
typedef unsigned long pstm_digit;
typedef unsigned long pstm_word __attribute__ ((mode(TI)));
#define DIGIT_BIT 64
#else
/* This is the default case, 32-bit digits, 64-bit word products */
typedef uint32 pstm_digit;
typedef uint64 pstm_word;
#define DIGIT_BIT 32
#define PSTM_32BIT
#endif /* digit and word size */
#define PSTM_MASK (pstm_digit)(-1)
#define PSTM_DIGIT_MAX PSTM_MASK
/******************************************************************************/
/*
equalities
*/
#define PSTM_LT -1 /* less than */
#define PSTM_EQ 0 /* equal to */
#define PSTM_GT 1 /* greater than */
#define PSTM_ZPOS 0 /* positive integer */
#define PSTM_NEG 1 /* negative */
#define PSTM_OKAY PS_SUCCESS
#define PSTM_MEM PS_MEM_FAIL
/******************************************************************************/
/*
Various build options
*/
#define PSTM_DEFAULT_INIT 64 /* default (64) digits of allocation */
#define PSTM_MAX_SIZE 4096
typedef struct {
int16 used, alloc, sign;
pstm_digit *dp;
psPool_t *pool;
} pstm_int;
/******************************************************************************/
/*
Operations on large integers
*/
#define pstm_iszero(a) (((a)->used == 0) ? PS_TRUE : PS_FALSE)
#define pstm_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? PS_TRUE : PS_FALSE)
#define pstm_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? PS_TRUE : PS_FALSE)
#define pstm_abs(a, b) { pstm_copy(a, b); (b)->sign = 0; }
extern void pstm_set(pstm_int *a, pstm_digit b);
extern void pstm_zero(pstm_int * a);
extern int32 pstm_init(psPool_t *pool, pstm_int * a);
extern int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size);
extern int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b,
int16 toSqr);
extern int16 pstm_count_bits (pstm_int * a);
extern int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a,
uint32 len);
extern int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c);
extern int32 pstm_unsigned_bin_size(pstm_int *a);
extern int32 pstm_copy(pstm_int * a, pstm_int * b);
extern void pstm_exch(pstm_int * a, pstm_int * b);
extern void pstm_clear(pstm_int * a);
extern void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2,
pstm_int *mp3, pstm_int *mp4, pstm_int *mp5, pstm_int *mp6,
pstm_int *mp7);
extern int32 pstm_grow(pstm_int * a, int16 size);
extern void pstm_clamp(pstm_int * a);
extern int32 pstm_cmp(pstm_int * a, pstm_int * b);
extern int32 pstm_cmp_mag(pstm_int * a, pstm_int * b);
extern void pstm_rshd(pstm_int *a, int16 x);
extern int32 pstm_lshd(pstm_int * a, int16 b);
extern int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
pstm_int *d);
extern int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c,
pstm_int *d);
extern int32 pstm_div_2(pstm_int * a, pstm_int * b);
extern int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c);
extern int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c);
extern int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c);
extern int32 pstm_mul_2(pstm_int * a, pstm_int * b);
extern int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c);
extern int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
pstm_int *d);
extern int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P,
pstm_int *Y);
extern int32 pstm_2expt(pstm_int *a, int16 b);
extern int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c);
extern int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a,
unsigned char *b);
extern int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a,
unsigned char *b);
extern int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho);
///bbox: pool unused
#define pstm_montgomery_reduce(pool, a, m, mp, paD, paDlen) \
pstm_montgomery_reduce( a, m, mp, paD, paDlen)
extern int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
pstm_digit mp, pstm_digit *paD, uint32 paDlen);
#define pstm_mul_comba(pool, A, B, C, paD, paDlen) \
pstm_mul_comba( A, B, C, paD, paDlen)
extern int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B,
pstm_int *C, pstm_digit *paD, uint32 paDlen);
///bbox: pool unused
#define pstm_sqr_comba(pool, A, B, paD, paDlen) \
pstm_sqr_comba( A, B, paD, paDlen)
extern int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B,
pstm_digit *paD, uint32 paDlen);
extern int32 pstm_cmp_d(pstm_int *a, pstm_digit b);
extern int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b);
extern int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c);
extern int32 pstm_invmod(psPool_t *pool, pstm_int * a, pstm_int * b,
pstm_int * c);
#else /* DISABLE_PSTM */
typedef int32 pstm_int;
#endif /* !DISABLE_PSTM */
#endif /* _h_PSTMATH */

View File

@ -0,0 +1,423 @@
/*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "tls.h"
/**
* @file pstm_montgomery_reduce.c
* @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
*
* Multiprecision Montgomery Reduction.
*/
/*
* Copyright (c) 2013-2015 INSIDE Secure Corporation
* Copyright (c) PeerSec Networks, 2002-2011
* All Rights Reserved
*
* The latest version of this code is available at http://www.matrixssl.org
*
* This software is open source; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This General Public License does NOT permit incorporating this software
* into proprietary programs. If you are unable to comply with the GPL, a
* commercial license for this software may be purchased from INSIDE at
* http://www.insidesecure.com/eng/Company/Locations
*
* This program is distributed in WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* http://www.gnu.org/copyleft/gpl.html
*/
/******************************************************************************/
///bbox
//#include "../cryptoApi.h"
#ifndef DISABLE_PSTM
/******************************************************************************/
#if defined(PSTM_X86)
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
#endif
//#pragma message ("Using 32 bit x86 Assembly Optimizations")
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movl %5,%%eax \n\t" \
"mull %4 \n\t" \
"addl %1,%%eax \n\t" \
"adcl $0,%%edx \n\t" \
"addl %%eax,%0 \n\t" \
"adcl $0,%%edx \n\t" \
"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
: "%eax", "%edx", "%cc")
#define PROPCARRY \
asm( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "%cc")
/******************************************************************************/
#elif defined(PSTM_X86_64)
/* x86-64 optimized */
#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
#endif
//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movq %5,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %1,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %%rax,%0 \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rdx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "cc")
#define INNERMUL8 \
asm( \
"movq 0(%5),%%rax \n\t" \
"movq 0(%2),%%r10 \n\t" \
"movq 0x8(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x8(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x10(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x10(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x8(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x18(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x18(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x10(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x20(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x20(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x18(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x28(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x28(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x20(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x30(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x30(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x28(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x38(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x38(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x30(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x38(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
:"=r"(_c), "=r"(cy) \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "cc")
#define PROPCARRY \
asm( \
"addq %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbq %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%rax", "cc")
/******************************************************************************/
#elif defined(PSTM_ARM)
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#ifdef __thumb2__
//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
#define INNERMUL \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" ITE CS \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
" UMLAL r0,%0,%3,%4 \n\t" \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
:"r0","%cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
" ITE CS \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"m"(_c[0])\
:"r0","%cc");
#else /* Non-Thumb2 code */
//#pragma message ("Using 32 bit ARM Assembly Optimizations")
#define INNERMUL \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
" UMLAL r0,%0,%3,%4 \n\t" \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
:"r0","%cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"m"(_c[0])\
:"r0","%cc");
#endif /* __thumb2__ */
/******************************************************************************/
#elif defined(PSTM_MIPS)
/* MIPS32 */
//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" multu %3,%4 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu $12,$12,%0 \n\t" \
" sltu $10,$12,%0 \n\t" \
" addu $13,$13,$10 \n\t" \
" lw $10,%1 \n\t" \
" addu $12,$12,$10 \n\t" \
" sltu $10,$12,$10 \n\t" \
" addu %0,$13,$10 \n\t" \
" sw $12,%1 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
:"$10","$12","$13")\
; ++tmpm;
#define PROPCARRY \
asm( \
" lw $10,%1 \n\t" \
" addu $10,$10,%0 \n\t" \
" sw $10,%1 \n\t" \
" sltu %0,$10,%0 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"r"(cy),"r"(_c[0])\
:"$10");
/******************************************************************************/
#else
/* ISO C code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
do { pstm_word t; \
t = ((pstm_word)_c[0] + (pstm_word)cy) + \
(((pstm_word)mu) * ((pstm_word)*tmpm++)); \
_c[0] = (pstm_digit)t; \
cy = (pstm_digit)(t >> DIGIT_BIT); \
} while (0)
#define PROPCARRY \
do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
#endif
/******************************************************************************/
#define LO 0
/* computes x/R == x (mod N) via Montgomery Reduction */
int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
pstm_digit mp, pstm_digit *paD, uint32 paDlen)
{
pstm_digit *c, *_c, *tmpm, mu;
int32 oldused, x, y;
int16 pa;
pa = m->used;
if (pa > a->alloc) {
/* Sanity test for bad numbers. This will confirm no buffer overruns */
return PS_LIMIT_FAIL;
}
if (paD && paDlen >= (uint32)2*pa+1) {
c = paD;
memset(c, 0x0, paDlen);
} else {
c = xzalloc(2*pa+1);
}
/* copy the input */
oldused = a->used;
for (x = 0; x < oldused; x++) {
c[x] = a->dp[x];
}
MONT_START;
for (x = 0; x < pa; x++) {
pstm_digit cy = 0;
/* get Mu for this round */
LOOP_START;
_c = c + x;
tmpm = m->dp;
y = 0;
#ifdef PSTM_X86_64
for (; y < (pa & ~7); y += 8) {
INNERMUL8;
_c += 8;
tmpm += 8;
}
#endif /* PSTM_X86_64 */
for (; y < pa; y++) {
INNERMUL;
++_c;
}
LOOP_END;
while (cy) {
PROPCARRY;
++_c;
}
}
/* now copy out */
_c = c + pa;
tmpm = a->dp;
for (x = 0; x < pa+1; x++) {
*tmpm++ = *_c++;
}
for (; x < oldused; x++) {
*tmpm++ = 0;
}
MONT_FINI;
a->used = pa+1;
pstm_clamp(a);
/* reuse x as return code */
x = PSTM_OKAY;
/* if A >= m then A = A - m */
if (pstm_cmp_mag (a, m) != PSTM_LT) {
if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
x = PS_MEM_FAIL;
}
}
if (paDlen < (uint32)2*pa+1) {
psFree(c, pool);
}
return x;
}
#endif /* !DISABLE_PSTM */
/******************************************************************************/

View File

@ -0,0 +1,777 @@
/*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "tls.h"
/**
* @file pstm_mul_comba.c
* @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
*
* Multiprecision multiplication with Comba technique.
*/
/*
* Copyright (c) 2013-2015 INSIDE Secure Corporation
* Copyright (c) PeerSec Networks, 2002-2011
* All Rights Reserved
*
* The latest version of this code is available at http://www.matrixssl.org
*
* This software is open source; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This General Public License does NOT permit incorporating this software
* into proprietary programs. If you are unable to comply with the GPL, a
* commercial license for this software may be purchased from INSIDE at
* http://www.insidesecure.com/eng/Company/Locations
*
* This program is distributed in WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* http://www.gnu.org/copyleft/gpl.html
*/
/******************************************************************************/
///bbox
//#include "../cryptoApi.h"
#ifndef DISABLE_PSTM
/******************************************************************************/
#if defined(PSTM_X86)
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
#endif
//#pragma message ("Using 32 bit x86 Assembly Optimizations")
/* anything you need at the start */
#define COMBA_START
/* clear the chaining variables */
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
/* forward the carry to the next digit */
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
/* store the first sum */
#define COMBA_STORE(x) \
x = c0;
/* store the second sum [carry] */
#define COMBA_STORE2(x) \
x = c1;
/* anything you need at the end */
#define COMBA_FINI
/* this should multiply i and j */
#define MULADD(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
/******************************************************************************/
#elif defined(PSTM_X86_64)
/* x86-64 optimized */
#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
#endif
//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
/* anything you need at the start */
#define COMBA_START
/* clear the chaining variables */
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
/* forward the carry to the next digit */
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
/* store the first sum */
#define COMBA_STORE(x) \
x = c0;
/* store the second sum [carry] */
#define COMBA_STORE2(x) \
x = c1;
/* anything you need at the end */
#define COMBA_FINI
/* this should multiply i and j */
#define MULADD(i, j) \
asm ( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
/******************************************************************************/
#elif defined(PSTM_ARM)
/* ARM code */
//#pragma message ("Using 32 bit ARM Assembly Optimizations")
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
/******************************************************************************/
#elif defined(PSTM_MIPS)
/* MIPS32 code */
//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
/******************************************************************************/
#else
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
do { pstm_word t; \
t = (pstm_word)c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \
t = (pstm_word)c1 + (t >> DIGIT_BIT); \
c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \
} while (0);
#endif
/******************************************************************************/
/* generic PxQ multiplier */
///bbox: pool unused
#define pstm_mul_comba_gen(pool, A, B, C, paD, paDlen) \
pstm_mul_comba_gen( A, B, C, paD, paDlen)
static int32 pstm_mul_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B,
pstm_int *C, pstm_digit *paD, uint32 paDlen)
{
int16 paDfail, pa;
int32 ix, iy, iz, tx, ty;
pstm_digit c0, c1, c2, *tmpx, *tmpy, *dst;
COMBA_START;
COMBA_CLEAR;
paDfail = 0;
/* get size of output and trim */
pa = A->used + B->used;
/*
If c is not large enough grow it and continue
*/
if (C->alloc < pa) {
if (pstm_grow(C, pa) != PSTM_OKAY) {
return PS_MEM_FAIL;
}
}
if (paD != NULL) {
if (paDlen < (sizeof(pstm_digit) * pa)) {
paDfail = 1; /* have a paD but it's not large enough */
dst = xzalloc(sizeof(pstm_digit) * pa);
} else {
dst = paD;
memset(dst, 0x0, paDlen);
}
} else {
dst = xzalloc(sizeof(pstm_digit) * pa);
}
for (ix = 0; ix < pa; ix++) {
/* get offsets into the two bignums */
ty = min(ix, B->used-1);
tx = ix - ty;
/* setup temp aliases */
tmpx = A->dp + tx;
tmpy = B->dp + ty;
/*
This is the number of times the loop will iterate, essentially it's
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = min(A->used-tx, ty+1);
/* execute loop */
COMBA_FORWARD;
for (iz = 0; iz < iy; ++iz) {
MULADD(*tmpx++, *tmpy--);
}
/* store term */
COMBA_STORE(dst[ix]);
}
COMBA_FINI;
/*
setup dest
*/
iy = C->used;
C->used = pa;
C->sign = A->sign ^ B->sign;
{
pstm_digit *tmpc;
tmpc = C->dp;
for (ix = 0; ix < pa; ix++) {
*tmpc++ = dst[ix];
}
/*
clear unused digits [that existed in the old copy of c]
*/
for (; ix < iy; ix++) {
*tmpc++ = 0;
}
}
pstm_clamp(C);
if ((paD == NULL) || (paDfail == 1)) {
psFree(dst, pool);
}
return PS_SUCCESS;
}
/******************************************************************************/
#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
static int32 pstm_mul_comba16(pstm_int *A, pstm_int *B, pstm_int *C)
{
pstm_digit c0, c1, c2, at[32];
if (C->alloc < 32) {
if (pstm_grow(C, 32) != PSTM_OKAY) {
return PS_MEM_FAIL;
}
}
memcpy(at, A->dp, 16 * sizeof(pstm_digit));
memcpy(at+16, B->dp, 16 * sizeof(pstm_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[16]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[17]); MULADD(at[1], at[16]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[14], at[31]); MULADD(at[15], at[30]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[15], at[31]);
COMBA_STORE(C->dp[30]);
COMBA_STORE2(C->dp[31]);
C->used = 32;
C->sign = A->sign ^ B->sign;
pstm_clamp(C);
COMBA_FINI;
return PSTM_OKAY;
}
#endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */
#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
static int32 pstm_mul_comba32(pstm_int *A, pstm_int *B, pstm_int *C)
{
pstm_digit c0, c1, c2, at[64];
int32 out_size;
if (C->alloc < 64) {
if (pstm_grow(C, 64) != PSTM_OKAY) {
return PS_MEM_FAIL;
}
}
out_size = A->used + B->used;
memcpy(at, A->dp, 32 * sizeof(pstm_digit));
memcpy(at+32, B->dp, 32 * sizeof(pstm_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[32]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[33]); MULADD(at[1], at[32]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]);
COMBA_STORE(C->dp[30]);
/* 31 */
COMBA_FORWARD;
MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]);
COMBA_STORE(C->dp[31]);
/* 32 */
COMBA_FORWARD;
MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]);
COMBA_STORE(C->dp[32]);
/* 33 */
COMBA_FORWARD;
MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]);
COMBA_STORE(C->dp[33]);
/* 34 */
COMBA_FORWARD;
MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]);
COMBA_STORE(C->dp[34]);
/* 35 */
COMBA_FORWARD;
MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]);
COMBA_STORE(C->dp[35]);
/* 36 */
COMBA_FORWARD;
MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]);
COMBA_STORE(C->dp[36]);
/* 37 */
COMBA_FORWARD;
MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]);
COMBA_STORE(C->dp[37]);
/* 38 */
COMBA_FORWARD;
MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]);
COMBA_STORE(C->dp[38]);
/* early out at 40 digits, 40*32==1280, or two 640 bit operands */
if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
/* 39 */
COMBA_FORWARD;
MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]);
COMBA_STORE(C->dp[39]);
/* 40 */
COMBA_FORWARD;
MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]);
COMBA_STORE(C->dp[40]);
/* 41 */
COMBA_FORWARD;
MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]);
COMBA_STORE(C->dp[41]);
/* 42 */
COMBA_FORWARD;
MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]);
COMBA_STORE(C->dp[42]);
/* 43 */
COMBA_FORWARD;
MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]);
COMBA_STORE(C->dp[43]);
/* 44 */
COMBA_FORWARD;
MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]);
COMBA_STORE(C->dp[44]);
/* 45 */
COMBA_FORWARD;
MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]);
COMBA_STORE(C->dp[45]);
/* 46 */
COMBA_FORWARD;
MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]);
COMBA_STORE(C->dp[46]);
/* early out at 48 digits, 48*32==1536, or two 768 bit operands */
if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
/* 47 */
COMBA_FORWARD;
MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]);
COMBA_STORE(C->dp[47]);
/* 48 */
COMBA_FORWARD;
MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]);
COMBA_STORE(C->dp[48]);
/* 49 */
COMBA_FORWARD;
MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]);
COMBA_STORE(C->dp[49]);
/* 50 */
COMBA_FORWARD;
MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]);
COMBA_STORE(C->dp[50]);
/* 51 */
COMBA_FORWARD;
MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]);
COMBA_STORE(C->dp[51]);
/* 52 */
COMBA_FORWARD;
MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]);
COMBA_STORE(C->dp[52]);
/* 53 */
COMBA_FORWARD;
MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]);
COMBA_STORE(C->dp[53]);
/* 54 */
COMBA_FORWARD;
MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]);
COMBA_STORE(C->dp[54]);
/* early out at 56 digits, 56*32==1792, or two 896 bit operands */
if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
/* 55 */
COMBA_FORWARD;
MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]);
COMBA_STORE(C->dp[55]);
/* 56 */
COMBA_FORWARD;
MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]);
COMBA_STORE(C->dp[56]);
/* 57 */
COMBA_FORWARD;
MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]);
COMBA_STORE(C->dp[57]);
/* 58 */
COMBA_FORWARD;
MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]);
COMBA_STORE(C->dp[58]);
/* 59 */
COMBA_FORWARD;
MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]);
COMBA_STORE(C->dp[59]);
/* 60 */
COMBA_FORWARD;
MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]);
COMBA_STORE(C->dp[60]);
/* 61 */
COMBA_FORWARD;
MULADD(at[30], at[63]); MULADD(at[31], at[62]);
COMBA_STORE(C->dp[61]);
/* 62 */
COMBA_FORWARD;
MULADD(at[31], at[63]);
COMBA_STORE(C->dp[62]);
COMBA_STORE2(C->dp[63]);
C->used = 64;
C->sign = A->sign ^ B->sign;
pstm_clamp(C);
COMBA_FINI;
return PSTM_OKAY;
}
#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
/******************************************************************************/
int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_int *C,
pstm_digit *paD, uint32 paDlen)
{
#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
if (A->used == 16 && B->used == 16) {
return pstm_mul_comba16(A, B, C);
} else {
#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
if (A->used == 32 && B->used == 32) {
return pstm_mul_comba32(A, B, C);
}
#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen);
}
#else
#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
if (A->used == 32 && B->used == 32) {
return pstm_mul_comba32(A, B, C);
}
#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen);
#endif
}
#endif /* !DISABLE_PSTM */
/******************************************************************************/

File diff suppressed because it is too large Load Diff

203
networking/tls_rsa.c Normal file
View File

@ -0,0 +1,203 @@
/*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "tls.h"
#define pkcs1Pad(in, inlen, out, outlen, cryptType, userPtr) \
pkcs1Pad(in, inlen, out, outlen, cryptType)
static ///bbox
int32 pkcs1Pad(unsigned char *in, uint32 inlen, unsigned char *out,
uint32 outlen, int32 cryptType, void *userPtr)
{
unsigned char *c;
int32 randomLen;
randomLen = outlen - 3 - inlen;
if (randomLen < 8) {
psTraceCrypto("pkcs1Pad failure\n");
return PS_LIMIT_FAIL;
}
c = out;
*c = 0x00;
c++;
*c = (unsigned char)cryptType;
c++;
if (cryptType == PUBKEY_TYPE) {
while (randomLen-- > 0) {
*c++ = 0xFF;
}
} else {
if (matrixCryptoGetPrngData(c, (uint32)randomLen, userPtr) < 0) {
return PS_PLATFORM_FAIL;
}
/*
SECURITY: Read through the random data and change all 0x0 to 0x01.
This is per spec that no random bytes should be 0
*/
while (randomLen-- > 0) {
if (*c == 0x0) {
*c = 0x01;
}
c++;
}
}
*c = 0x00;
c++;
memcpy(c, in, inlen);
return outlen;
}
#define psRsaCrypt(pool, in, inlen, out, outlen, key, type, data) \
psRsaCrypt(pool, in, inlen, out, outlen, key, type)
static ///bbox
int32 psRsaCrypt(psPool_t *pool, const unsigned char *in, uint32 inlen,
unsigned char *out, uint32 *outlen, psRsaKey_t *key, int32 type,
void *data)
{
pstm_int tmp, tmpa, tmpb;
int32 res;
uint32 x;
if (in == NULL || out == NULL || outlen == NULL || key == NULL) {
psTraceCrypto("NULL parameter error in psRsaCrypt\n");
return PS_ARG_FAIL;
}
tmp.dp = tmpa.dp = tmpb.dp = NULL;
/* Init and copy into tmp */
if (pstm_init_for_read_unsigned_bin(pool, &tmp, inlen + sizeof(pstm_digit))
!= PS_SUCCESS) {
return PS_FAILURE;
}
if (pstm_read_unsigned_bin(&tmp, (unsigned char *)in, inlen) != PS_SUCCESS){
pstm_clear(&tmp);
return PS_FAILURE;
}
/* Sanity check on the input */
if (pstm_cmp(&key->N, &tmp) == PSTM_LT) {
res = PS_LIMIT_FAIL;
goto done;
}
if (type == PRIVKEY_TYPE) {
if (key->optimized) {
if (pstm_init_size(pool, &tmpa, key->p.alloc) != PS_SUCCESS) {
res = PS_FAILURE;
goto done;
}
if (pstm_init_size(pool, &tmpb, key->q.alloc) != PS_SUCCESS) {
pstm_clear(&tmpa);
res = PS_FAILURE;
goto done;
}
if (pstm_exptmod(pool, &tmp, &key->dP, &key->p, &tmpa) !=
PS_SUCCESS) {
psTraceCrypto("decrypt error: pstm_exptmod dP, p\n");
goto error;
}
if (pstm_exptmod(pool, &tmp, &key->dQ, &key->q, &tmpb) !=
PS_SUCCESS) {
psTraceCrypto("decrypt error: pstm_exptmod dQ, q\n");
goto error;
}
if (pstm_sub(&tmpa, &tmpb, &tmp) != PS_SUCCESS) {
psTraceCrypto("decrypt error: sub tmpb, tmp\n");
goto error;
}
if (pstm_mulmod(pool, &tmp, &key->qP, &key->p, &tmp) != PS_SUCCESS) {
psTraceCrypto("decrypt error: pstm_mulmod qP, p\n");
goto error;
}
if (pstm_mul_comba(pool, &tmp, &key->q, &tmp, NULL, 0)
!= PS_SUCCESS){
psTraceCrypto("decrypt error: pstm_mul q \n");
goto error;
}
if (pstm_add(&tmp, &tmpb, &tmp) != PS_SUCCESS) {
psTraceCrypto("decrypt error: pstm_add tmp \n");
goto error;
}
} else {
if (pstm_exptmod(pool, &tmp, &key->d, &key->N, &tmp) !=
PS_SUCCESS) {
psTraceCrypto("psRsaCrypt error: pstm_exptmod\n");
goto error;
}
}
} else if (type == PUBKEY_TYPE) {
if (pstm_exptmod(pool, &tmp, &key->e, &key->N, &tmp) != PS_SUCCESS) {
psTraceCrypto("psRsaCrypt error: pstm_exptmod\n");
goto error;
}
} else {
psTraceCrypto("psRsaCrypt error: invalid type param\n");
goto error;
}
/* Read it back */
x = pstm_unsigned_bin_size(&key->N);
if ((uint32)x > *outlen) {
res = -1;
psTraceCrypto("psRsaCrypt error: pstm_unsigned_bin_size\n");
goto done;
}
/* We want the encrypted value to always be the key size. Pad with 0x0 */
while ((uint32)x < (unsigned long)key->size) {
*out++ = 0x0;
x++;
}
*outlen = x;
/* Convert it */
memset(out, 0x0, x);
if (pstm_to_unsigned_bin(pool, &tmp, out+(x-pstm_unsigned_bin_size(&tmp)))
!= PS_SUCCESS) {
psTraceCrypto("psRsaCrypt error: pstm_to_unsigned_bin\n");
goto error;
}
/* Clean up and return */
res = PS_SUCCESS;
goto done;
error:
res = PS_FAILURE;
done:
if (type == PRIVKEY_TYPE && key->optimized) {
pstm_clear_multi(&tmpa, &tmpb, NULL, NULL, NULL, NULL, NULL, NULL);
}
pstm_clear(&tmp);
return res;
}
int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key,
unsigned char *in, uint32 inlen,
unsigned char *out, uint32 outlen, void *data)
{
int32 err;
uint32 size;
size = key->size;
if (outlen < size) {
psTraceCrypto("Error on bad outlen parameter to psRsaEncryptPub\n");
return PS_ARG_FAIL;
}
if ((err = pkcs1Pad(in, inlen, out, size, PRIVKEY_TYPE, data))
< PS_SUCCESS) {
psTraceCrypto("Error padding psRsaEncryptPub. Likely data too long\n");
return err;
}
if ((err = psRsaCrypt(pool, out, size, out, (uint32*)&outlen, key,
PUBKEY_TYPE, data)) < PS_SUCCESS) {
psTraceCrypto("Error performing psRsaEncryptPub\n");
return err;
}
if (outlen != size) {
psTraceCrypto("Encrypted size error in psRsaEncryptPub\n");
return PS_FAILURE;
}
return size;
}

18
networking/tls_rsa.h Normal file
View File

@ -0,0 +1,18 @@
/*
* Copyright (C) 2017 Denys Vlasenko
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
typedef struct {
pstm_int e, d, N, qP, dP, dQ, p, q;
uint32 size; /* Size of the key in bytes */
int32 optimized; /* 1 for optimized */
psPool_t *pool;
} psRsaKey_t;
#define psRsaEncryptPub(pool, key, in, inlen, out, outlen, data) \
psRsaEncryptPub(pool, key, in, inlen, out, outlen)
int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key,
unsigned char *in, uint32 inlen,
unsigned char *out, uint32 outlen, void *data);