diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c8e3b0597..52abdbb1e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -269,6 +269,18 @@ if(NOT WOLFSSL_SINGLE_THREADED) endif() endif() +# DTLS-SRTP +add_option("WOLFSSL_SRTP" + "Enables wolfSSL DTLS-SRTP (default: disabled)" + "no" "yes;no") + +if(WOLFSSL_SRTP) + list(APPEND WOLFSSL_DEFINITIONS + "-DWOLFSSL_SRTP") + set(WOLFSSL_DTLS "yes") + set(WOLFSSL_KEYING_MATERIAL "yes") +endif() + # DTLS add_option("WOLFSSL_DTLS" diff --git a/ChangeLog.md b/ChangeLog.md index 89959661d5..5e4591149e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -23,7 +23,7 @@ NOTE: * --enable-heapmath is being deprecated and will be removed by 2024 * Added LMS/HSS and XMSS/XMSS^MT wolfcrypt hooks, both normal and verify-only options. * Added support for the AES EAX mode of operation * Port for use with Hitch (https://github.com/varnish/hitch) added -* Add XTS API's to handle multiple sectors in new port ot VeraCrypt +* Add XTS API's to handle multiple sectors in new port to VeraCrypt ## Enhancements and Optimizations diff --git a/IDE/CRYPTOCELL/main.c b/IDE/CRYPTOCELL/main.c index cc596fe8fb..7938d0dfae 100644 --- a/IDE/CRYPTOCELL/main.c +++ b/IDE/CRYPTOCELL/main.c @@ -27,7 +27,7 @@ /* wolfCrypt_Init/wolfCrypt_Cleanup to turn CryptoCell hardware on/off */ #include -/* SEGGER_RTT_Init, you can potential replace it with other serial terminal */ +/* SEGGER_RTT_Init, you can potentially replace it with other serial terminal */ #include "SEGGER_RTT.h" int main(void) diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/CMakeLists.txt b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/CMakeLists.txt index b5ee75c613..2f1e9e4111 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/CMakeLists.txt +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/CMakeLists.txt @@ -206,7 +206,7 @@ else() "\"${WOLFSSL_ROOT}/wolfcrypt/src\"" "\"${WOLFSSL_ROOT}/wolfcrypt/src/port/Espressif\"" "\"${WOLFSSL_ROOT}/wolfcrypt/src/port/atmel\"" - # TODO: Make this a univeral makefile that detects if bechmark / test needed + # TODO: Make this a universal makefile that detects if benchmark / test needed # Sometimes problematic with SM; consider gating detection. #"\"${WOLFSSL_ROOT}/wolfcrypt/benchmark\"" # the benchmark application #"\"${WOLFSSL_ROOT}/wolfcrypt/test\"" # the test application diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/CMakeLists.txt b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/CMakeLists.txt index b5ee75c613..2f1e9e4111 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/CMakeLists.txt +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/CMakeLists.txt @@ -206,7 +206,7 @@ else() "\"${WOLFSSL_ROOT}/wolfcrypt/src\"" "\"${WOLFSSL_ROOT}/wolfcrypt/src/port/Espressif\"" "\"${WOLFSSL_ROOT}/wolfcrypt/src/port/atmel\"" - # TODO: Make this a univeral makefile that detects if bechmark / test needed + # TODO: Make this a universal makefile that detects if benchmark / test needed # Sometimes problematic with SM; consider gating detection. #"\"${WOLFSSL_ROOT}/wolfcrypt/benchmark\"" # the benchmark application #"\"${WOLFSSL_ROOT}/wolfcrypt/test\"" # the test application diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c index 63aaaf27e4..bcf220d8e2 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c @@ -241,6 +241,6 @@ void app_main(void) #else vTaskDelay(60000); #endif - } /* done whle */ + } /* done while */ #endif } diff --git a/IDE/Renesas/cs+/Projects/t4_demo/README_en.txt b/IDE/Renesas/cs+/Projects/t4_demo/README_en.txt index 492d2c7f37..6e2a3bcc44 100644 --- a/IDE/Renesas/cs+/Projects/t4_demo/README_en.txt +++ b/IDE/Renesas/cs+/Projects/t4_demo/README_en.txt @@ -12,7 +12,7 @@ Setup process: - Unzip wolfssl under the same directory 2. Set up wolfSSL - - open wolfssl\IDE\Renesas\cs+\Projec/wolfssl\lib.mtpj with CS+ and build + - open wolfssl\IDE\Renesas\cs+\Projects\wolfssl\lib.mtpj with CS+ and build - open t4_demo.mtpj and build. This create demo program library. 3. Set up AlphaProject diff --git a/IDE/Renesas/e2studio/RZN2L/README.md b/IDE/Renesas/e2studio/RZN2L/README.md index 8433702da4..55df4f2c15 100644 --- a/IDE/Renesas/e2studio/RZN2L/README.md +++ b/IDE/Renesas/e2studio/RZN2L/README.md @@ -152,7 +152,7 @@ $./examples/server/server -b -d -i -v 4 + For ECDSA sign and verify use, Enable the `USE_CERT_BUFFER_256` macro in `wolfssl_demo.h` -Disble the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` +Disable the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` + launch server with the following option. ``` @@ -214,7 +214,7 @@ $./examples/server/server -b -d -i -v 3 + For ECDSA sign and verify use, Enable the `USE_CERT_BUFFER_256` macro in `wolfssl_demo.h` -Disble the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` +Disable the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` + launch server with the following option. ``` @@ -281,7 +281,7 @@ static const byte ucIPAddress[4] = { 192, 168, 11, 241 }; + For ECDSA sign and verify use, Enable the `USE_CERT_BUFFER_256` macro in `wolfssl_demo.h` -Disble the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` +Disable the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` + launch server from e2studio @@ -311,7 +311,7 @@ Cleaning up socket and wolfSSL objects. Waiting connection.... ``` -You will see the follwoing message on Linux terminal. +You will see the following message on Linux terminal. ``` $ ./examples/client/client -h 192.168.11.241 -p 11111 -v 4 SSL version is TLSv1.3 @@ -333,7 +333,7 @@ Received: hello wolfssl! Cleaning up socket and wolfSSL objects. Waiting connection.... ``` -You will see the follwoing message on Linux terminal. +You will see the following message on Linux terminal. ``` $ ./examples/client/client -h 192.168.11.241 -p 11111 -v 4 -A ./certs/ca-ecc-cert.pem -c ./certs/client-ecc-cert.pem -k ./cert s/ecc-client-key.pem @@ -359,7 +359,7 @@ static const byte ucIPAddress[4] = { 192, 168, 11, 241 }; + For ECDSA sign and verify use, Enable the `USE_CERT_BUFFER_256` macro in `wolfssl_demo.h` -Disble the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` +Disable the `USE_CERT_BUFFER_2048` macro in `wolfssl_demo.h` + launch server from e2studio @@ -389,7 +389,7 @@ Cleaning up socket and wolfSSL objects. Waiting connection.... ``` -You will see the follwoing message on Linux terminal. +You will see the following message on Linux terminal. ``` $ ./examples/client/client -h 192.168.11.241 -p 11111 -v 3 SSL version is TLSv1.2 @@ -411,7 +411,7 @@ Received: hello wolfssl! Cleaning up socket and wolfSSL objects. Waiting connection.... ``` -You will see the follwoing message on Linux terminal. +You will see the following message on Linux terminal. ``` $ ./examples/client/client -h 192.168.11.241 -p 11111 -v 3 -A ./certs/ca-ecc-cert.pem -c ./certs/client-ecc-cert.pem -k ./certs/ecc-client-key.pem SSL version is TLSv1.2 diff --git a/IDE/Renesas/e2studio/RZN2L/test/src/rzn2l_tst_thread_entry.c b/IDE/Renesas/e2studio/RZN2L/test/src/rzn2l_tst_thread_entry.c index 14152aa825..0e4c459ed0 100644 --- a/IDE/Renesas/e2studio/RZN2L/test/src/rzn2l_tst_thread_entry.c +++ b/IDE/Renesas/e2studio/RZN2L/test/src/rzn2l_tst_thread_entry.c @@ -131,7 +131,7 @@ void RSIP_KeyGeneration(FSPSM_ST *g) } /* only pointer sets to NULL */ -/* onwer of keys should be freed */ +/* owner of keys should be freed */ void Clr_CallbackCtx(FSPSM_ST *g) { (void) g; diff --git a/README b/README index f645c3e8d4..ff86035e43 100644 --- a/README +++ b/README @@ -95,7 +95,7 @@ NOTE: * --enable-heapmath is being deprecated and will be removed by 2024 * Added LMS/HSS and XMSS/XMSS^MT wolfcrypt hooks, both normal and verify-only options. * Added support for the AES EAX mode of operation * Port for use with Hitch (https://github.com/varnish/hitch) added -* Add XTS API's to handle multiple sectors in new port ot VeraCrypt +* Add XTS API's to handle multiple sectors in new port to VeraCrypt ## Enhancements and Optimizations diff --git a/README.md b/README.md index 0472d6bd9a..c16642b9cc 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ NOTE: * --enable-heapmath is being deprecated and will be removed by 2024 * Added LMS/HSS and XMSS/XMSS^MT wolfcrypt hooks, both normal and verify-only options. * Added support for the AES EAX mode of operation * Port for use with Hitch (https://github.com/varnish/hitch) added -* Add XTS API's to handle multiple sectors in new port ot VeraCrypt +* Add XTS API's to handle multiple sectors in new port to VeraCrypt ## Enhancements and Optimizations diff --git a/cmake/functions.cmake b/cmake/functions.cmake index 47ab832653..329a386eda 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -53,7 +53,7 @@ function(generate_build_flags) if(WOLFSSL_SCTP OR WOLFSSL_USER_SETTINGS) set(BUILD_SCTP "yes" PARENT_SCOPE) endif() - if(WOLFSSL_DTLS_CID OR WOLFSSL_USER_SETTINGS) + if(WOLFSSL_DTLS_CID OR WOLFSSL_USER_SETTINGS OR WOLFSSL_DTLS) set(BUILD_DTLS_COMMON "yes" PARENT_SCOPE) endif() set(BUILD_MCAST ${WOLFSSL_MCAST} PARENT_SCOPE) diff --git a/doc/dox_comments/header_files/aes.h b/doc/dox_comments/header_files/aes.h index bbdee91e56..5f1610f76e 100644 --- a/doc/dox_comments/header_files/aes.h +++ b/doc/dox_comments/header_files/aes.h @@ -1533,7 +1533,7 @@ WOLFSSL_API int wc_AesEaxEncryptFinal(AesEax* eax, \ref wc_AesEaxInit. When done using the \c AesEax context structure, make sure to free it using \ref wc_AesEaxFree. - \return 0 if data is authenticated succesfully + \return 0 if data is authenticated successfully \return AES_EAX_AUTH_E if the authentication tag does not match the supplied authentication code vector \c authIn \return other error code on failure diff --git a/examples/server/server.h b/examples/server/server.h index dbf492b13d..e0c8ad7bab 100644 --- a/examples/server/server.h +++ b/examples/server/server.h @@ -27,7 +27,7 @@ THREAD_RETURN WOLFSSL_THREAD server_test(void* args); /* Echo bytes using buffer of blockSize until [echoData] bytes are complete. */ -/* If [bechmarkThroughput] set the statistcs will be output at the end */ +/* If [benchmarkThroughput] set the statistics will be output at the end */ int ServerEchoData(WOLFSSL* ssl, int clientfd, int echoData, int blockSize, size_t benchmarkThroughput); diff --git a/src/internal.c b/src/internal.c index 4dae1b960c..cd478b9a03 100644 --- a/src/internal.c +++ b/src/internal.c @@ -20394,7 +20394,7 @@ int ProcessReplyEx(WOLFSSL* ssl, int allowSocketErr) case getRecordLayerHeader: /* DTLSv1.3 record numbers in the header are encrypted, and AAD - * uses the unecrypted form. Because of this we need to modify the + * uses the unencrypted form. Because of this we need to modify the * header, decrypting the numbers inside * DtlsParseUnifiedRecordLayer(). This violates the const attribute * of the buffer parameter of GetRecordHeader() used here. */ diff --git a/src/ssl.c b/src/ssl.c index 2d56dcf85d..49547acf5e 100644 --- a/src/ssl.c +++ b/src/ssl.c @@ -8432,7 +8432,7 @@ static int LoadSystemCaCertsWindows(WOLFSSL_CTX* ctx, byte* loaded) * directly into wolfSSL "the old way". * * As of MacOS 14.0 we are still able to use this method to access system - * certificates. Accessiblity of this API is indicated by the presence of the + * certificates. Accessibility of this API is indicated by the presence of the * Security/SecTrustSettings.h header. In the likely event that Apple removes * access to this API on Macs, this function should be removed and the * DoAppleNativeCertValidation() routine should be used for all devices. @@ -8579,7 +8579,7 @@ int wolfSSL_CTX_load_system_CA_certs(WOLFSSL_CTX* ctx) #if defined(HAVE_SECURITY_SECTRUSTSETTINGS_H) \ && !defined(WOLFSSL_APPLE_NATIVE_CERT_VALIDATION) /* As of MacOS 14.0 we are still able to access system certificates and - * load them manually into wolfSSL "the old way". Accessiblity of this API + * load them manually into wolfSSL "the old way". Accessibility of this API * is indicated by the presence of the Security/SecTrustSettings.h header */ ret = LoadSystemCaCertsMac(ctx, &loaded); #elif defined(WOLFSSL_APPLE_NATIVE_CERT_VALIDATION) diff --git a/src/ssl_crypto.c b/src/ssl_crypto.c index b7ff5a19e5..063d1eafc2 100644 --- a/src/ssl_crypto.c +++ b/src/ssl_crypto.c @@ -1616,8 +1616,8 @@ WOLFSSL_HMAC_CTX* wolfSSL_HMAC_CTX_new(void) * * Not an OpenSSL compatibility API. * - * @param [in, out] ctx HMAC contect object. - * @return 1 inficating success. + * @param [in, out] ctx HMAC context object. + * @return 1 indicating success. */ int wolfSSL_HMAC_CTX_Init(WOLFSSL_HMAC_CTX* ctx) { diff --git a/src/tls.c b/src/tls.c index 9a42a3912c..eaa06a18b0 100644 --- a/src/tls.c +++ b/src/tls.c @@ -8396,7 +8396,7 @@ static int TLSX_KeyShare_ProcessPqc(WOLFSSL* ssl, KeyShareEntry* keyShareEntry) ret = kyber_id2type(oqs_group, &type); if (ret != 0) { WOLFSSL_MSG("Invalid OQS algorithm specified."); - ret = BAD_FUNC_ARG; + return BAD_FUNC_ARG; } if (ret == 0) { ret = wc_KyberKey_Init(type, kem, ssl->heap, INVALID_DEVID); @@ -8887,7 +8887,7 @@ static int server_generate_pqc_ciphertext(WOLFSSL* ssl, ret = kyber_id2type(oqs_group, &type); if (ret != 0) { WOLFSSL_MSG("Invalid Kyber algorithm specified."); - ret = BAD_FUNC_ARG; + return BAD_FUNC_ARG; } if (ret == 0) { diff --git a/src/x509.c b/src/x509.c index a7b512bd7f..a1c4fc4dbe 100644 --- a/src/x509.c +++ b/src/x509.c @@ -5218,7 +5218,7 @@ static WOLFSSL_X509* loadX509orX509REQFromBuffer( const unsigned char* buf, int sz, int format, int type) { - int ret; + int ret = 0; WOLFSSL_X509* x509 = NULL; DerBuffer* der = NULL; @@ -5226,7 +5226,8 @@ static WOLFSSL_X509* loadX509orX509REQFromBuffer( if (format == WOLFSSL_FILETYPE_PEM) { #ifdef WOLFSSL_PEM_TO_DER - if (PemToDer(buf, sz, type, &der, NULL, NULL, NULL) != 0) { + ret = PemToDer(buf, sz, type, &der, NULL, NULL, NULL); + if (ret != 0) { FreeDer(&der); } #else @@ -5252,20 +5253,28 @@ static WOLFSSL_X509* loadX509orX509REQFromBuffer( #ifdef WOLFSSL_SMALL_STACK cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL, DYNAMIC_TYPE_DCERT); - if (cert != NULL) + if (cert == NULL) { + ret = MEMORY_ERROR; + } + else #endif { InitDecodedCert(cert, der->buffer, der->length, NULL); - if (ParseCertRelative(cert, type, 0, NULL) == 0) { + ret = ParseCertRelative(cert, type, 0, NULL); + if (ret == 0) { x509 = (WOLFSSL_X509*)XMALLOC(sizeof(WOLFSSL_X509), NULL, DYNAMIC_TYPE_X509); if (x509 != NULL) { InitX509(x509, 1, NULL); - if (CopyDecodedToX509(x509, cert) != 0) { + ret = CopyDecodedToX509(x509, cert); + if (ret != 0) { wolfSSL_X509_free(x509); x509 = NULL; } } + else { + ret = MEMORY_ERROR; + } } FreeDecodedCert(cert); @@ -5277,6 +5286,10 @@ static WOLFSSL_X509* loadX509orX509REQFromBuffer( FreeDer(&der); } + if (ret != 0) { + WOLFSSL_ERROR(ret); + } + return x509; } diff --git a/tests/api.c b/tests/api.c index 2b35b13bf9..5c384882fb 100644 --- a/tests/api.c +++ b/tests/api.c @@ -35690,7 +35690,7 @@ static int test_X509_STORE_untrusted(void) NULL }; - /* Only immediate issuer in untrusted chaing. Fails since can't build chain + /* Only immediate issuer in untrusted chain. Fails since can't build chain * to loaded CA. */ ExpectIntEQ(test_X509_STORE_untrusted_certs(untrusted1, 0, X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY, 1), TEST_SUCCESS); diff --git a/wolfcrypt/src/ext_lms.c b/wolfcrypt/src/ext_lms.c index ebd07e26c5..a5155076b2 100644 --- a/wolfcrypt/src/ext_lms.c +++ b/wolfcrypt/src/ext_lms.c @@ -231,7 +231,7 @@ const char * wc_LmsKey_RcToStr(enum wc_LmsRc lmsEc) /* Init an LMS key. * - * Call this before setting the parms of an LMS key. + * Call this before setting the params of an LMS key. * * Returns 0 on success. * */ @@ -404,7 +404,7 @@ int wc_LmsKey_SetParameters(LmsKey * key, int levels, int height, key->lm_ots_type[i] = ots; } - /* Move the state to parms set. + /* Move the state to params set. * Key is ready for MakeKey or Reload. */ key->state = WC_LMS_STATE_PARMSET; @@ -656,7 +656,7 @@ int wc_LmsKey_MakeKey(LmsKey* key, WC_RNG * rng) return 0; } -/* Reload a key that has been prepared with the appropriate parms and +/* Reload a key that has been prepared with the appropriate params and * data. Use this if you wish to resume signing with an existing key. * * Write/read callbacks, and context data, must be set prior. diff --git a/wolfcrypt/src/ext_xmss.c b/wolfcrypt/src/ext_xmss.c index c19e95e916..b1e5e46dd4 100644 --- a/wolfcrypt/src/ext_xmss.c +++ b/wolfcrypt/src/ext_xmss.c @@ -97,7 +97,7 @@ static int sha256_cb(const unsigned char *in, unsigned long long inlen, /* Init an XMSS key. * - * Call this before setting the parms of an XMSS key. + * Call this before setting the params of an XMSS key. * * key [in] The XMSS key to init. * heap [in] Unused. @@ -201,7 +201,7 @@ static int wc_XmssKey_SetOid(XmssKey * key, uint32_t oid, int is_xmssmt) /* Set the XMSS key parameter string. * - * The input string must be one of the supported parm set names in + * The input string must be one of the supported param set names in * the "Name" section from the table in wolfssl/wolfcrypt/xmss.h, * e.g. "XMSS-SHA2_10_256" or "XMSSMT-SHA2_20/4_256". * diff --git a/wolfcrypt/src/port/Espressif/esp32_mp.c b/wolfcrypt/src/port/Espressif/esp32_mp.c index 066c61f685..1b699dfddf 100644 --- a/wolfcrypt/src/port/Espressif/esp32_mp.c +++ b/wolfcrypt/src/port/Espressif/esp32_mp.c @@ -1015,8 +1015,8 @@ int esp_mp_montgomery_init(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, return MP_HW_FALLBACK; } if ((X == NULL) || (Y == NULL) || (M == NULL) ) { - /* if a bad oprand passed, we cannot use HW */ - ESP_LOGE(TAG, "ERROR: Bad Montgomery operand, falling back to SW"); + /* if a bad operand passed, we cannot use HW */ + ESP_LOGE(TAG, "ERROR: Bad montgomery operand, falling back to SW"); return MP_HW_FALLBACK; } XMEMSET(mph, 0, sizeof(struct esp_mp_helper)); diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 989e65bd25..2efc3d35d8 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -1882,7 +1882,7 @@ static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, byte counter[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; /* Noticed different optimization levels treated head of array different. - * Some cases was stack pointer plus offset others was a regester containing + * Some cases was stack pointer plus offset others was a register containing * address. To make uniform for passing in to inline assembly code am using * pointers to the head of each local array. */ @@ -3528,7 +3528,7 @@ static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, byte counter[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; /* Noticed different optimization levels treated head of array different. - * Some cases was stack pointer plus offset others was a regester containing + * Some cases was stack pointer plus offset others was a register containing * address. To make uniform for passing in to inline assembly code am using * pointers to the head of each local array. */ @@ -5291,7 +5291,7 @@ static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, byte counter[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; /* Noticed different optimization levels treated head of array different. - * Some cases was stack pointer plus offset others was a regester containing + * Some cases was stack pointer plus offset others was a register containing * address. To make uniform for passing in to inline assembly code am using * pointers to the head of each local array. */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index f483f87de4..0badf8f97f 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -670,13 +670,13 @@ L_AES_invert_key_mix_loop: EOR r8, r8, r9, ROR #24 STR r8, [r0], #4 SUBS r11, r11, #0x1 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_invert_key_mix_loop #else - BNE.N L_AES_invert_key_mix_loop + BNE.W L_AES_invert_key_mix_loop #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 165 + /* Cycle Count = 165 */ .size AES_invert_key,.-AES_invert_key #endif /* HAVE_AES_DECRYPT */ .text @@ -699,20 +699,20 @@ L_AES_Thumb2_rcon: .globl AES_set_encrypt_key .type AES_set_encrypt_key, %function AES_set_encrypt_key: - PUSH {r4, r5, r6, r7, r8, lr} - LDR r8, L_AES_Thumb2_te + PUSH {r4, r5, r6, r7, r8, r9, r10, lr} + LDR r10, L_AES_Thumb2_te ADR lr, L_AES_Thumb2_rcon CMP r1, #0x80 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_set_encrypt_key_start_128 #else - BEQ.N L_AES_set_encrypt_key_start_128 + BEQ.W L_AES_set_encrypt_key_start_128 #endif CMP r1, #0xc0 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_set_encrypt_key_start_192 #else - BEQ.N L_AES_set_encrypt_key_start_192 + BEQ.W L_AES_set_encrypt_key_start_192 #endif LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] @@ -735,10 +735,10 @@ L_AES_set_encrypt_key_loop_256: UBFX r5, r7, #8, #8 UBFX r6, r7, #16, #8 LSR r7, r7, #24 - LDRB r4, [r8, r4, LSL #2] - LDRB r5, [r8, r5, LSL #2] - LDRB r6, [r8, r6, LSL #2] - LDRB r7, [r8, r7, LSL #2] + LDRB r4, [r10, r4, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r7, [r10, r7, LSL #2] EOR r3, r7, r4, LSL #8 EOR r3, r3, r5, LSL #16 EOR r3, r3, r6, LSL #24 @@ -757,10 +757,10 @@ L_AES_set_encrypt_key_loop_256: UBFX r5, r3, #16, #8 LSR r6, r3, #24 UBFX r3, r3, #0, #8 - LDRB r4, [r8, r4, LSL #2] - LDRB r6, [r8, r6, LSL #2] - LDRB r5, [r8, r5, LSL #2] - LDRB r3, [r8, r3, LSL #2] + LDRB r4, [r10, r4, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r3, [r10, r3, LSL #2] EOR r3, r3, r4, LSL #8 EOR r3, r3, r5, LSL #16 EOR r3, r3, r6, LSL #24 @@ -782,10 +782,10 @@ L_AES_set_encrypt_key_loop_256: UBFX r5, r7, #8, #8 UBFX r6, r7, #16, #8 LSR r7, r7, #24 - LDRB r4, [r8, r4, LSL #2] - LDRB r5, [r8, r5, LSL #2] - LDRB r6, [r8, r6, LSL #2] - LDRB r7, [r8, r7, LSL #2] + LDRB r4, [r10, r4, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r7, [r10, r7, LSL #2] EOR r3, r7, r4, LSL #8 EOR r3, r3, r5, LSL #16 EOR r3, r3, r6, LSL #24 @@ -799,69 +799,77 @@ L_AES_set_encrypt_key_loop_256: ADD r2, r2, #0x10 STM r2, {r4, r5, r6, r7} SUB r2, r2, #0x10 +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_set_encrypt_key_end +#else + B.N L_AES_set_encrypt_key_end +#endif L_AES_set_encrypt_key_start_192: LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] - LDRD r0, r1, [r0, #16] + LDRD r8, r9, [r0, #16] REV r4, r4 REV r5, r5 REV r6, r6 REV r7, r7 - REV r0, r0 - REV r1, r1 + REV r8, r8 + REV r9, r9 STM r2, {r4, r5, r6, r7} - STRD r0, r1, [r2, #16] - MOV r7, r1 + STRD r8, r9, [r2, #16] + MOV r7, r9 MOV r12, #0x7 L_AES_set_encrypt_key_loop_192: - UBFX r0, r7, #0, #8 - UBFX r1, r7, #8, #8 - UBFX r4, r7, #16, #8 - LSR r7, r7, #24 - LDRB r0, [r8, r0, LSL #2] - LDRB r1, [r8, r1, LSL #2] - LDRB r4, [r8, r4, LSL #2] - LDRB r7, [r8, r7, LSL #2] - EOR r3, r7, r0, LSL #8 - EOR r3, r3, r1, LSL #16 - EOR r3, r3, r4, LSL #24 - LDM r2!, {r0, r1, r4, r5, r6, r7} - EOR r0, r0, r3 + UBFX r4, r9, #0, #8 + UBFX r5, r9, #8, #8 + UBFX r6, r9, #16, #8 + LSR r9, r9, #24 + LDRB r4, [r10, r4, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r9, [r10, r9, LSL #2] + EOR r3, r9, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7, r8, r9} + EOR r4, r4, r3 LDM lr!, {r3} - EOR r0, r0, r3 - EOR r1, r1, r0 - EOR r4, r4, r1 + EOR r4, r4, r3 EOR r5, r5, r4 EOR r6, r6, r5 EOR r7, r7, r6 - STM r2, {r0, r1, r4, r5, r6, r7} + EOR r8, r8, r7 + EOR r9, r9, r8 + STM r2, {r4, r5, r6, r7, r8, r9} SUBS r12, r12, #0x1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_set_encrypt_key_loop_192 #else BNE.N L_AES_set_encrypt_key_loop_192 #endif - UBFX r0, r7, #0, #8 - UBFX r1, r7, #8, #8 - UBFX r4, r7, #16, #8 - LSR r7, r7, #24 - LDRB r0, [r8, r0, LSL #2] - LDRB r1, [r8, r1, LSL #2] - LDRB r4, [r8, r4, LSL #2] - LDRB r7, [r8, r7, LSL #2] - EOR r3, r7, r0, LSL #8 - EOR r3, r3, r1, LSL #16 - EOR r3, r3, r4, LSL #24 - LDM r2!, {r0, r1, r4, r5, r6, r7} - EOR r0, r0, r3 + UBFX r4, r9, #0, #8 + UBFX r5, r9, #8, #8 + UBFX r6, r9, #16, #8 + LSR r9, r9, #24 + LDRB r4, [r10, r4, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r9, [r10, r9, LSL #2] + EOR r3, r9, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7, r8, r9} + EOR r4, r4, r3 LDM lr!, {r3} - EOR r0, r0, r3 - EOR r1, r1, r0 - EOR r4, r4, r1 + EOR r4, r4, r3 EOR r5, r5, r4 - STM r2, {r0, r1, r4, r5} + EOR r6, r6, r5 + EOR r7, r7, r6 + STM r2, {r4, r5, r6, r7} +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_set_encrypt_key_end +#else + B.N L_AES_set_encrypt_key_end +#endif L_AES_set_encrypt_key_start_128: LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] @@ -876,10 +884,10 @@ L_AES_set_encrypt_key_loop_128: UBFX r5, r7, #8, #8 UBFX r6, r7, #16, #8 LSR r7, r7, #24 - LDRB r4, [r8, r4, LSL #2] - LDRB r5, [r8, r5, LSL #2] - LDRB r6, [r8, r6, LSL #2] - LDRB r7, [r8, r7, LSL #2] + LDRB r4, [r10, r4, LSL #2] + LDRB r5, [r10, r5, LSL #2] + LDRB r6, [r10, r6, LSL #2] + LDRB r7, [r10, r7, LSL #2] EOR r3, r7, r4, LSL #8 EOR r3, r3, r5, LSL #16 EOR r3, r3, r6, LSL #24 @@ -898,8 +906,8 @@ L_AES_set_encrypt_key_loop_128: BNE.N L_AES_set_encrypt_key_loop_128 #endif L_AES_set_encrypt_key_end: - POP {r4, r5, r6, r7, r8, pc} - # Cycle Count = 327 + POP {r4, r5, r6, r7, r8, r9, r10, pc} + /* Cycle Count = 331 */ .size AES_set_encrypt_key,.-AES_set_encrypt_key .text .align 4 @@ -953,7 +961,7 @@ L_AES_encrypt_block_nr: LDM r3!, {r4, r5, r6, r7} EOR r11, r11, lr, ROR #24 EOR r11, r11, r2, ROR #8 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r8, r8, r4 EOR r9, r9, r5 EOR r10, r10, r6 @@ -1003,16 +1011,16 @@ L_AES_encrypt_block_nr: LDM r3!, {r8, r9, r10, r11} EOR r7, r7, lr, ROR #24 EOR r7, r7, r2, ROR #8 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 SUBS r1, r1, #0x1 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_encrypt_block_nr #else - BNE.N L_AES_encrypt_block_nr + BNE.W L_AES_encrypt_block_nr #endif UBFX r8, r5, #16, #8 LSR r11, r4, #24 @@ -1059,7 +1067,7 @@ L_AES_encrypt_block_nr: LDM r3!, {r4, r5, r6, r7} EOR r11, r11, lr, ROR #24 EOR r11, r11, r2, ROR #8 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r8, r8, r4 EOR r9, r9, r5 EOR r10, r10, r6 @@ -1109,13 +1117,13 @@ L_AES_encrypt_block_nr: LDM r3, {r8, r9, r10, r11} EOR r7, r7, lr, LSL #8 EOR r7, r7, r2, LSL #16 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 POP {pc} - # Cycle Count = 285 + /* Cycle Count = 285 */ .size AES_encrypt_block,.-AES_encrypt_block #if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text @@ -1137,16 +1145,16 @@ AES_ECB_encrypt: LDR r12, [sp, #36] PUSH {r3} CMP r12, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_ECB_encrypt_start_block_128 #else - BEQ.N L_AES_ECB_encrypt_start_block_128 + BEQ.W L_AES_ECB_encrypt_start_block_128 #endif CMP r12, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_ECB_encrypt_start_block_192 #else - BEQ.N L_AES_ECB_encrypt_start_block_192 + BEQ.W L_AES_ECB_encrypt_start_block_192 #endif L_AES_ECB_encrypt_loop_block_256: LDR r4, [lr] @@ -1159,7 +1167,7 @@ L_AES_ECB_encrypt_loop_block_256: REV r7, r7 PUSH {r1, r2, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1179,12 +1187,16 @@ L_AES_ECB_encrypt_loop_block_256: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_encrypt_loop_block_256 #else - BNE.N L_AES_ECB_encrypt_loop_block_256 + BNE.W L_AES_ECB_encrypt_loop_block_256 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_ECB_encrypt_end +#else + B.N L_AES_ECB_encrypt_end +#endif L_AES_ECB_encrypt_start_block_192: L_AES_ECB_encrypt_loop_block_192: LDR r4, [lr] @@ -1197,7 +1209,7 @@ L_AES_ECB_encrypt_loop_block_192: REV r7, r7 PUSH {r1, r2, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1217,12 +1229,16 @@ L_AES_ECB_encrypt_loop_block_192: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_encrypt_loop_block_192 #else - BNE.N L_AES_ECB_encrypt_loop_block_192 + BNE.W L_AES_ECB_encrypt_loop_block_192 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_ECB_encrypt_end +#else + B.N L_AES_ECB_encrypt_end +#endif L_AES_ECB_encrypt_start_block_128: L_AES_ECB_encrypt_loop_block_128: LDR r4, [lr] @@ -1235,7 +1251,7 @@ L_AES_ECB_encrypt_loop_block_128: REV r7, r7 PUSH {r1, r2, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1255,15 +1271,15 @@ L_AES_ECB_encrypt_loop_block_128: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_encrypt_loop_block_128 #else - BNE.N L_AES_ECB_encrypt_loop_block_128 + BNE.W L_AES_ECB_encrypt_loop_block_128 #endif L_AES_ECB_encrypt_end: POP {r3} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 212 + /* Cycle Count = 212 */ .size AES_ECB_encrypt,.-AES_ECB_encrypt #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC @@ -1280,16 +1296,16 @@ AES_CBC_encrypt: LDM r9, {r4, r5, r6, r7} PUSH {r3, r9} CMP r8, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_encrypt_start_block_128 #else - BEQ.N L_AES_CBC_encrypt_start_block_128 + BEQ.W L_AES_CBC_encrypt_start_block_128 #endif CMP r8, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_encrypt_start_block_192 #else - BEQ.N L_AES_CBC_encrypt_start_block_192 + BEQ.W L_AES_CBC_encrypt_start_block_192 #endif L_AES_CBC_encrypt_loop_block_256: LDR r8, [lr] @@ -1306,7 +1322,7 @@ L_AES_CBC_encrypt_loop_block_256: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1326,12 +1342,16 @@ L_AES_CBC_encrypt_loop_block_256: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_encrypt_loop_block_256 #else - BNE.N L_AES_CBC_encrypt_loop_block_256 + BNE.W L_AES_CBC_encrypt_loop_block_256 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_CBC_encrypt_end +#else + B.N L_AES_CBC_encrypt_end +#endif L_AES_CBC_encrypt_start_block_192: L_AES_CBC_encrypt_loop_block_192: LDR r8, [lr] @@ -1348,7 +1368,7 @@ L_AES_CBC_encrypt_loop_block_192: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1368,12 +1388,16 @@ L_AES_CBC_encrypt_loop_block_192: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_encrypt_loop_block_192 #else - BNE.N L_AES_CBC_encrypt_loop_block_192 + BNE.W L_AES_CBC_encrypt_loop_block_192 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_CBC_encrypt_end +#else + B.N L_AES_CBC_encrypt_end +#endif L_AES_CBC_encrypt_start_block_128: L_AES_CBC_encrypt_loop_block_128: LDR r8, [lr] @@ -1390,7 +1414,7 @@ L_AES_CBC_encrypt_loop_block_128: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1410,16 +1434,16 @@ L_AES_CBC_encrypt_loop_block_128: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_encrypt_loop_block_128 #else - BNE.N L_AES_CBC_encrypt_loop_block_128 + BNE.W L_AES_CBC_encrypt_loop_block_128 #endif L_AES_CBC_encrypt_end: POP {r3, r9} STM r9, {r4, r5, r6, r7} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 238 + /* Cycle Count = 238 */ .size AES_CBC_encrypt,.-AES_CBC_encrypt #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER @@ -1441,16 +1465,16 @@ AES_CTR_encrypt: STM r8, {r4, r5, r6, r7} PUSH {r3, r8} CMP r12, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CTR_encrypt_start_block_128 #else - BEQ.N L_AES_CTR_encrypt_start_block_128 + BEQ.W L_AES_CTR_encrypt_start_block_128 #endif CMP r12, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CTR_encrypt_start_block_192 #else - BEQ.N L_AES_CTR_encrypt_start_block_192 + BEQ.W L_AES_CTR_encrypt_start_block_192 #endif L_AES_CTR_encrypt_loop_block_256: PUSH {r1, r2, lr} @@ -1461,7 +1485,7 @@ L_AES_CTR_encrypt_loop_block_256: ADC r8, r4, #0x0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1491,12 +1515,16 @@ L_AES_CTR_encrypt_loop_block_256: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CTR_encrypt_loop_block_256 #else - BNE.N L_AES_CTR_encrypt_loop_block_256 + BNE.W L_AES_CTR_encrypt_loop_block_256 #endif +#ifdef __GNUC__ B L_AES_CTR_encrypt_end +#else + B.W L_AES_CTR_encrypt_end +#endif L_AES_CTR_encrypt_start_block_192: L_AES_CTR_encrypt_loop_block_192: PUSH {r1, r2, lr} @@ -1507,7 +1535,7 @@ L_AES_CTR_encrypt_loop_block_192: ADC r8, r4, #0x0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1537,12 +1565,16 @@ L_AES_CTR_encrypt_loop_block_192: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CTR_encrypt_loop_block_192 #else - BNE.N L_AES_CTR_encrypt_loop_block_192 + BNE.W L_AES_CTR_encrypt_loop_block_192 #endif +#ifdef __GNUC__ B L_AES_CTR_encrypt_end +#else + B.W L_AES_CTR_encrypt_end +#endif L_AES_CTR_encrypt_start_block_128: L_AES_CTR_encrypt_loop_block_128: PUSH {r1, r2, lr} @@ -1553,7 +1585,7 @@ L_AES_CTR_encrypt_loop_block_128: ADC r8, r4, #0x0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -1583,10 +1615,10 @@ L_AES_CTR_encrypt_loop_block_128: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CTR_encrypt_loop_block_128 #else - BNE.N L_AES_CTR_encrypt_loop_block_128 + BNE.W L_AES_CTR_encrypt_loop_block_128 #endif L_AES_CTR_encrypt_end: POP {r3, r8} @@ -1596,7 +1628,7 @@ L_AES_CTR_encrypt_end: REV r7, r7 STM r8, {r4, r5, r6, r7} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 293 + /* Cycle Count = 293 */ .size AES_CTR_encrypt,.-AES_CTR_encrypt #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT @@ -1653,7 +1685,7 @@ L_AES_decrypt_block_nr: LDM r3!, {r4, r5, r6, r7} EOR r11, r11, lr, ROR #8 EOR r11, r11, r12, ROR #24 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r8, r8, r4 EOR r9, r9, r5 EOR r10, r10, r6 @@ -1703,16 +1735,16 @@ L_AES_decrypt_block_nr: LDM r3!, {r8, r9, r10, r11} EOR r7, r7, lr, ROR #8 EOR r7, r7, r12, ROR #24 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 SUBS r1, r1, #0x1 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_decrypt_block_nr #else - BNE.N L_AES_decrypt_block_nr + BNE.W L_AES_decrypt_block_nr #endif UBFX r8, r7, #16, #8 LSR r11, r4, #24 @@ -1759,7 +1791,7 @@ L_AES_decrypt_block_nr: LDM r3!, {r4, r5, r6, r7} EOR r11, r11, lr, ROR #8 EOR r11, r11, r12, ROR #24 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r8, r8, r4 EOR r9, r9, r5 EOR r10, r10, r6 @@ -1809,13 +1841,13 @@ L_AES_decrypt_block_nr: LDM r3, {r8, r9, r10, r11} EOR r7, r7, r12, LSL #8 EOR r7, r7, lr, LSL #16 - # XOR in Key Schedule + /* XOR in Key Schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 POP {pc} - # Cycle Count = 285 + /* Cycle Count = 285 */ .size AES_decrypt_block,.-AES_decrypt_block .text .type L_AES_Thumb2_td_ecb, %object @@ -2097,16 +2129,16 @@ AES_ECB_decrypt: MOV r12, r2 ADR r2, L_AES_Thumb2_td4 CMP r8, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_ECB_decrypt_start_block_128 #else - BEQ.N L_AES_ECB_decrypt_start_block_128 + BEQ.W L_AES_ECB_decrypt_start_block_128 #endif CMP r8, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_ECB_decrypt_start_block_192 #else - BEQ.N L_AES_ECB_decrypt_start_block_192 + BEQ.W L_AES_ECB_decrypt_start_block_192 #endif L_AES_ECB_decrypt_loop_block_256: LDR r4, [lr] @@ -2119,7 +2151,7 @@ L_AES_ECB_decrypt_loop_block_256: REV r7, r7 PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2138,12 +2170,16 @@ L_AES_ECB_decrypt_loop_block_256: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_decrypt_loop_block_256 #else - BNE.N L_AES_ECB_decrypt_loop_block_256 + BNE.W L_AES_ECB_decrypt_loop_block_256 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_ECB_decrypt_end +#else + B.N L_AES_ECB_decrypt_end +#endif L_AES_ECB_decrypt_start_block_192: L_AES_ECB_decrypt_loop_block_192: LDR r4, [lr] @@ -2156,7 +2192,7 @@ L_AES_ECB_decrypt_loop_block_192: REV r7, r7 PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2175,12 +2211,16 @@ L_AES_ECB_decrypt_loop_block_192: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_decrypt_loop_block_192 #else - BNE.N L_AES_ECB_decrypt_loop_block_192 + BNE.W L_AES_ECB_decrypt_loop_block_192 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_ECB_decrypt_end +#else + B.N L_AES_ECB_decrypt_end +#endif L_AES_ECB_decrypt_start_block_128: L_AES_ECB_decrypt_loop_block_128: LDR r4, [lr] @@ -2193,7 +2233,7 @@ L_AES_ECB_decrypt_loop_block_128: REV r7, r7 PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2212,14 +2252,14 @@ L_AES_ECB_decrypt_loop_block_128: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_ECB_decrypt_loop_block_128 #else - BNE.N L_AES_ECB_decrypt_loop_block_128 + BNE.W L_AES_ECB_decrypt_loop_block_128 #endif L_AES_ECB_decrypt_end: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 210 + /* Cycle Count = 210 */ .size AES_ECB_decrypt,.-AES_ECB_decrypt #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC @@ -2237,16 +2277,16 @@ AES_CBC_decrypt: ADR r2, L_AES_Thumb2_td4 PUSH {r3, r4} CMP r8, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_decrypt_loop_block_128 #else - BEQ.N L_AES_CBC_decrypt_loop_block_128 + BEQ.W L_AES_CBC_decrypt_loop_block_128 #endif CMP r8, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_decrypt_loop_block_192 #else - BEQ.N L_AES_CBC_decrypt_loop_block_192 + BEQ.W L_AES_CBC_decrypt_loop_block_192 #endif L_AES_CBC_decrypt_loop_block_256: PUSH {r1, r12, lr} @@ -2262,7 +2302,7 @@ L_AES_CBC_decrypt_loop_block_256: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2288,10 +2328,10 @@ L_AES_CBC_decrypt_loop_block_256: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_decrypt_end_odd #else - BEQ.N L_AES_CBC_decrypt_end_odd + BEQ.W L_AES_CBC_decrypt_end_odd #endif PUSH {r1, r12, lr} LDR r4, [lr] @@ -2306,7 +2346,7 @@ L_AES_CBC_decrypt_loop_block_256: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2333,12 +2373,16 @@ L_AES_CBC_decrypt_loop_block_256: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_decrypt_loop_block_256 #else - BNE.N L_AES_CBC_decrypt_loop_block_256 + BNE.W L_AES_CBC_decrypt_loop_block_256 #endif +#ifdef __GNUC__ B L_AES_CBC_decrypt_end +#else + B.W L_AES_CBC_decrypt_end +#endif L_AES_CBC_decrypt_loop_block_192: PUSH {r1, r12, lr} LDR r4, [lr] @@ -2353,7 +2397,7 @@ L_AES_CBC_decrypt_loop_block_192: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2379,10 +2423,10 @@ L_AES_CBC_decrypt_loop_block_192: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_decrypt_end_odd #else - BEQ.N L_AES_CBC_decrypt_end_odd + BEQ.W L_AES_CBC_decrypt_end_odd #endif PUSH {r1, r12, lr} LDR r4, [lr] @@ -2397,7 +2441,7 @@ L_AES_CBC_decrypt_loop_block_192: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2424,12 +2468,16 @@ L_AES_CBC_decrypt_loop_block_192: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_decrypt_loop_block_192 #else - BNE.N L_AES_CBC_decrypt_loop_block_192 + BNE.W L_AES_CBC_decrypt_loop_block_192 #endif +#ifdef __GNUC__ B L_AES_CBC_decrypt_end +#else + B.W L_AES_CBC_decrypt_end +#endif L_AES_CBC_decrypt_loop_block_128: PUSH {r1, r12, lr} LDR r4, [lr] @@ -2444,7 +2492,7 @@ L_AES_CBC_decrypt_loop_block_128: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2470,10 +2518,10 @@ L_AES_CBC_decrypt_loop_block_128: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_CBC_decrypt_end_odd #else - BEQ.N L_AES_CBC_decrypt_end_odd + BEQ.W L_AES_CBC_decrypt_end_odd #endif PUSH {r1, r12, lr} LDR r4, [lr] @@ -2488,7 +2536,7 @@ L_AES_CBC_decrypt_loop_block_128: REV r5, r5 REV r6, r6 REV r7, r7 - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -2515,12 +2563,16 @@ L_AES_CBC_decrypt_loop_block_128: SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_CBC_decrypt_loop_block_128 #else - BNE.N L_AES_CBC_decrypt_loop_block_128 + BNE.W L_AES_CBC_decrypt_loop_block_128 #endif +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_CBC_decrypt_end +#else + B.N L_AES_CBC_decrypt_end +#endif L_AES_CBC_decrypt_end_odd: LDR r4, [sp, #4] LDRD r8, r9, [r4, #16] @@ -2530,7 +2582,7 @@ L_AES_CBC_decrypt_end_odd: L_AES_CBC_decrypt_end: POP {r3, r4} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 518 + /* Cycle Count = 518 */ .size AES_CBC_decrypt,.-AES_CBC_decrypt #endif /* HAVE_AES_CBC */ #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ @@ -3109,13 +3161,13 @@ L_GCM_gmult_len_start_block: POP {r3} SUBS r3, r3, #0x10 ADD r2, r2, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_GCM_gmult_len_start_block #else - BNE.N L_GCM_gmult_len_start_block + BNE.W L_GCM_gmult_len_start_block #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 742 + /* Cycle Count = 742 */ .size GCM_gmult_len,.-GCM_gmult_len .text .type L_AES_Thumb2_te_gcm, %object @@ -3141,16 +3193,16 @@ AES_GCM_encrypt: STM r8, {r4, r5, r6, r7} PUSH {r3, r8} CMP r12, #0xa -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_GCM_encrypt_start_block_128 #else - BEQ.N L_AES_GCM_encrypt_start_block_128 + BEQ.W L_AES_GCM_encrypt_start_block_128 #endif CMP r12, #0xc -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BEQ L_AES_GCM_encrypt_start_block_192 #else - BEQ.N L_AES_GCM_encrypt_start_block_192 + BEQ.W L_AES_GCM_encrypt_start_block_192 #endif L_AES_GCM_encrypt_loop_block_256: PUSH {r1, r2, lr} @@ -3158,7 +3210,7 @@ L_AES_GCM_encrypt_loop_block_256: ADD r7, r7, #0x1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -3188,12 +3240,16 @@ L_AES_GCM_encrypt_loop_block_256: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_GCM_encrypt_loop_block_256 #else - BNE.N L_AES_GCM_encrypt_loop_block_256 + BNE.W L_AES_GCM_encrypt_loop_block_256 #endif +#ifdef __GNUC__ B L_AES_GCM_encrypt_end +#else + B.W L_AES_GCM_encrypt_end +#endif L_AES_GCM_encrypt_start_block_192: L_AES_GCM_encrypt_loop_block_192: PUSH {r1, r2, lr} @@ -3201,7 +3257,7 @@ L_AES_GCM_encrypt_loop_block_192: ADD r7, r7, #0x1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -3231,12 +3287,16 @@ L_AES_GCM_encrypt_loop_block_192: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_GCM_encrypt_loop_block_192 #else - BNE.N L_AES_GCM_encrypt_loop_block_192 + BNE.W L_AES_GCM_encrypt_loop_block_192 #endif +#ifdef __GNUC__ B L_AES_GCM_encrypt_end +#else + B.W L_AES_GCM_encrypt_end +#endif L_AES_GCM_encrypt_start_block_128: L_AES_GCM_encrypt_loop_block_128: PUSH {r1, r2, lr} @@ -3244,7 +3304,7 @@ L_AES_GCM_encrypt_loop_block_128: ADD r7, r7, #0x1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] - # Round: 0 - XOR in key schedule + /* Round: 0 - XOR in key schedule */ EOR r4, r4, r8 EOR r5, r5, r9 EOR r6, r6, r10 @@ -3274,10 +3334,10 @@ L_AES_GCM_encrypt_loop_block_128: SUBS r2, r2, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_AES_GCM_encrypt_loop_block_128 #else - BNE.N L_AES_GCM_encrypt_loop_block_128 + BNE.W L_AES_GCM_encrypt_loop_block_128 #endif L_AES_GCM_encrypt_end: POP {r3, r8} @@ -3287,7 +3347,7 @@ L_AES_GCM_encrypt_end: REV r7, r7 STM r8, {r4, r5, r6, r7} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 275 + /* Cycle Count = 275 */ .size AES_GCM_encrypt,.-AES_GCM_encrypt #endif /* HAVE_AESGCM */ #endif /* !NO_AES */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 48b5edc16c..7d5357f1a2 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -39,7 +39,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -208,9 +208,9 @@ void AES_invert_key(unsigned char* ks, word32 rounds) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p; register word32 rounds __asm__ ("r1") = (word32)rounds_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_c __asm__ ("r2") = (uint32_t*)L_AES_Thumb2_te; register uint32_t* L_AES_Thumb2_td_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_td; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, %[L_AES_Thumb2_te]\n\t" @@ -218,7 +218,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "ADD r10, %[ks], %[rounds], LSL #4\n\t" "MOV r11, %[rounds]\n\t" "\n" - "L_AES_invert_key_loop_%=:\n\t" + "L_AES_invert_key_loop:\n\t" "LDM %[ks], {r2, r3, r4, r5}\n\t" "LDM r10, {r6, r7, r8, r9}\n\t" "STM r10, {r2, r3, r4, r5}\n\t" @@ -226,15 +226,15 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "SUBS r11, r11, #0x2\n\t" "SUB r10, r10, #0x10\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_invert_key_loop_%=\n\t" + "BNE L_AES_invert_key_loop\n\t" #else - "BNE.N L_AES_invert_key_loop_%=\n\t" + "BNE.N L_AES_invert_key_loop\n\t" #endif "SUB %[ks], %[ks], %[rounds], LSL #3\n\t" "ADD %[ks], %[ks], #0x10\n\t" "SUB r11, %[rounds], #0x1\n\t" "\n" - "L_AES_invert_key_mix_loop_%=:\n\t" + "L_AES_invert_key_mix_loop:\n\t" "LDM %[ks], {r2, r3, r4, r5}\n\t" "UBFX r6, r2, #0, #8\n\t" "UBFX r7, r2, #8, #8\n\t" @@ -301,13 +301,19 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "EOR r8, r8, r9, ROR #24\n\t" "STR r8, [%[ks]], #4\n\t" "SUBS r11, r11, #0x1\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_invert_key_mix_loop_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_invert_key_mix_loop\n\t" #else - "BNE.N L_AES_invert_key_mix_loop_%=\n\t" + "BNE.W L_AES_invert_key_mix_loop\n\t" #endif - : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [ks] "+r" (ks), [rounds] "+r" (rounds), + [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) : +#else + : [ks] "+r" (ks), [rounds] "+r" (rounds) + : [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te), [L_AES_Thumb2_td] "r" (L_AES_Thumb2_td) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -316,7 +322,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds) static const uint32_t L_AES_Thumb2_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, + 0x1b000000, 0x36000000 }; void AES_set_encrypt_key(const unsigned char* key, word32 len, @@ -331,24 +337,24 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks register const unsigned char* key __asm__ ("r0") = (const unsigned char*)key_p; register word32 len __asm__ ("r1") = (word32)len_p; register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_te; register uint32_t* L_AES_Thumb2_rcon_c __asm__ ("r4") = (uint32_t*)&L_AES_Thumb2_rcon; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r8, %[L_AES_Thumb2_te]\n\t" + "MOV r10, %[L_AES_Thumb2_te]\n\t" "MOV lr, %[L_AES_Thumb2_rcon]\n\t" "CMP %[len], #0x80\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_set_encrypt_key_start_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_set_encrypt_key_start_128\n\t" #else - "BEQ.N L_AES_set_encrypt_key_start_128_%=\n\t" + "BEQ.W L_AES_set_encrypt_key_start_128\n\t" #endif "CMP %[len], #0xc0\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_set_encrypt_key_start_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_set_encrypt_key_start_192\n\t" #else - "BEQ.N L_AES_set_encrypt_key_start_192_%=\n\t" + "BEQ.W L_AES_set_encrypt_key_start_192\n\t" #endif "LDRD r4, r5, [%[key]]\n\t" "LDRD r6, r7, [%[key], #8]\n\t" @@ -367,15 +373,15 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "SUB %[ks], %[ks], #0x10\n\t" "MOV r12, #0x6\n\t" "\n" - "L_AES_set_encrypt_key_loop_256_%=:\n\t" + "L_AES_set_encrypt_key_loop_256:\n\t" "UBFX r4, r7, #0, #8\n\t" "UBFX r5, r7, #8, #8\n\t" "UBFX r6, r7, #16, #8\n\t" "LSR r7, r7, #24\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r5, [r8, r5, LSL #2]\n\t" - "LDRB r6, [r8, r6, LSL #2]\n\t" - "LDRB r7, [r8, r7, LSL #2]\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r7, [r10, r7, LSL #2]\n\t" "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" @@ -394,10 +400,10 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "UBFX r5, r3, #16, #8\n\t" "LSR r6, r3, #24\n\t" "UBFX r3, r3, #0, #8\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r6, [r8, r6, LSL #2]\n\t" - "LDRB r5, [r8, r5, LSL #2]\n\t" - "LDRB r3, [r8, r3, LSL #2]\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r3, [r10, r3, LSL #2]\n\t" "EOR r3, r3, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" @@ -411,18 +417,18 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "SUB %[ks], %[ks], #0x10\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_set_encrypt_key_loop_256_%=\n\t" + "BNE L_AES_set_encrypt_key_loop_256\n\t" #else - "BNE.N L_AES_set_encrypt_key_loop_256_%=\n\t" + "BNE.N L_AES_set_encrypt_key_loop_256\n\t" #endif "UBFX r4, r7, #0, #8\n\t" "UBFX r5, r7, #8, #8\n\t" "UBFX r6, r7, #16, #8\n\t" "LSR r7, r7, #24\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r5, [r8, r5, LSL #2]\n\t" - "LDRB r6, [r8, r6, LSL #2]\n\t" - "LDRB r7, [r8, r7, LSL #2]\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r7, [r10, r7, LSL #2]\n\t" "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" @@ -436,73 +442,81 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "ADD %[ks], %[ks], #0x10\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" "SUB %[ks], %[ks], #0x10\n\t" - "B L_AES_set_encrypt_key_end_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_AES_set_encrypt_key_end\n\t" +#else + "B.N L_AES_set_encrypt_key_end\n\t" +#endif "\n" - "L_AES_set_encrypt_key_start_192_%=:\n\t" + "L_AES_set_encrypt_key_start_192:\n\t" "LDRD r4, r5, [%[key]]\n\t" "LDRD r6, r7, [%[key], #8]\n\t" - "LDRD %[key], %[len], [%[key], #16]\n\t" + "LDRD r8, r9, [%[key], #16]\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "REV %[key], %[key]\n\t" - "REV %[len], %[len]\n\t" + "REV r8, r8\n\t" + "REV r9, r9\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "STRD %[key], %[len], [%[ks], #16]\n\t" - "MOV r7, %[len]\n\t" + "STRD r8, r9, [%[ks], #16]\n\t" + "MOV r7, r9\n\t" "MOV r12, #0x7\n\t" "\n" - "L_AES_set_encrypt_key_loop_192_%=:\n\t" - "UBFX r0, r7, #0, #8\n\t" - "UBFX r1, r7, #8, #8\n\t" - "UBFX r4, r7, #16, #8\n\t" - "LSR r7, r7, #24\n\t" - "LDRB r0, [r8, r0, LSL #2]\n\t" - "LDRB r1, [r8, r1, LSL #2]\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r7, [r8, r7, LSL #2]\n\t" - "EOR r3, r7, r0, LSL #8\n\t" - "EOR r3, r3, r1, LSL #16\n\t" - "EOR r3, r3, r4, LSL #24\n\t" - "LDM %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" - "EOR r0, r0, r3\n\t" + "L_AES_set_encrypt_key_loop_192:\n\t" + "UBFX r4, r9, #0, #8\n\t" + "UBFX r5, r9, #8, #8\n\t" + "UBFX r6, r9, #16, #8\n\t" + "LSR r9, r9, #24\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r9, [r10, r9, LSL #2]\n\t" + "EOR r3, r9, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" + "EOR r4, r4, r3\n\t" "LDM lr!, {r3}\n\t" - "EOR r0, r0, r3\n\t" - "EOR r1, r1, r0\n\t" - "EOR r4, r4, r1\n\t" + "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" "EOR r7, r7, r6\n\t" - "STM %[ks], {r0, r1, r4, r5, r6, r7}\n\t" + "EOR r8, r8, r7\n\t" + "EOR r9, r9, r8\n\t" + "STM %[ks], {r4, r5, r6, r7, r8, r9}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_set_encrypt_key_loop_192_%=\n\t" + "BNE L_AES_set_encrypt_key_loop_192\n\t" #else - "BNE.N L_AES_set_encrypt_key_loop_192_%=\n\t" + "BNE.N L_AES_set_encrypt_key_loop_192\n\t" #endif - "UBFX r0, r7, #0, #8\n\t" - "UBFX r1, r7, #8, #8\n\t" - "UBFX r4, r7, #16, #8\n\t" - "LSR r7, r7, #24\n\t" - "LDRB r0, [r8, r0, LSL #2]\n\t" - "LDRB r1, [r8, r1, LSL #2]\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r7, [r8, r7, LSL #2]\n\t" - "EOR r3, r7, r0, LSL #8\n\t" - "EOR r3, r3, r1, LSL #16\n\t" - "EOR r3, r3, r4, LSL #24\n\t" - "LDM %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" - "EOR r0, r0, r3\n\t" + "UBFX r4, r9, #0, #8\n\t" + "UBFX r5, r9, #8, #8\n\t" + "UBFX r6, r9, #16, #8\n\t" + "LSR r9, r9, #24\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r9, [r10, r9, LSL #2]\n\t" + "EOR r3, r9, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" + "EOR r4, r4, r3\n\t" "LDM lr!, {r3}\n\t" - "EOR r0, r0, r3\n\t" - "EOR r1, r1, r0\n\t" - "EOR r4, r4, r1\n\t" + "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" - "STM %[ks], {r0, r1, r4, r5}\n\t" - "B L_AES_set_encrypt_key_end_%=\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_AES_set_encrypt_key_end\n\t" +#else + "B.N L_AES_set_encrypt_key_end\n\t" +#endif "\n" - "L_AES_set_encrypt_key_start_128_%=:\n\t" + "L_AES_set_encrypt_key_start_128:\n\t" "LDRD r4, r5, [%[key]]\n\t" "LDRD r6, r7, [%[key], #8]\n\t" "REV r4, r4\n\t" @@ -512,15 +526,15 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "STM %[ks], {r4, r5, r6, r7}\n\t" "MOV r12, #0xa\n\t" "\n" - "L_AES_set_encrypt_key_loop_128_%=:\n\t" + "L_AES_set_encrypt_key_loop_128:\n\t" "UBFX r4, r7, #0, #8\n\t" "UBFX r5, r7, #8, #8\n\t" "UBFX r6, r7, #16, #8\n\t" "LSR r7, r7, #24\n\t" - "LDRB r4, [r8, r4, LSL #2]\n\t" - "LDRB r5, [r8, r5, LSL #2]\n\t" - "LDRB r6, [r8, r6, LSL #2]\n\t" - "LDRB r7, [r8, r7, LSL #2]\n\t" + "LDRB r4, [r10, r4, LSL #2]\n\t" + "LDRB r5, [r10, r5, LSL #2]\n\t" + "LDRB r6, [r10, r6, LSL #2]\n\t" + "LDRB r7, [r10, r7, LSL #2]\n\t" "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" @@ -534,15 +548,21 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "STM %[ks], {r4, r5, r6, r7}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_set_encrypt_key_loop_128_%=\n\t" + "BNE L_AES_set_encrypt_key_loop_128\n\t" #else - "BNE.N L_AES_set_encrypt_key_loop_128_%=\n\t" + "BNE.N L_AES_set_encrypt_key_loop_128\n\t" #endif "\n" - "L_AES_set_encrypt_key_end_%=:\n\t" - : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) + "L_AES_set_encrypt_key_end:\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), + [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) : - : "memory", "r12", "lr", "r5", "r6", "r7", "r8" +#else + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks) + : [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te), [L_AES_Thumb2_rcon] "r" (L_AES_Thumb2_rcon) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -562,7 +582,7 @@ void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks) __asm__ __volatile__ ( "\n" - "L_AES_encrypt_block_nr_%=:\n\t" + "L_AES_encrypt_block_nr:\n\t" "UBFX r8, r5, #16, #8\n\t" "LSR r11, r4, #24\n\t" "UBFX lr, r6, #8, #8\n\t" @@ -664,10 +684,10 @@ void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks) "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" "SUBS %[nr], %[nr], #0x1\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_encrypt_block_nr_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_encrypt_block_nr\n\t" #else - "BNE.N L_AES_encrypt_block_nr_%=\n\t" + "BNE.W L_AES_encrypt_block_nr\n\t" #endif "UBFX r8, r5, #16, #8\n\t" "LSR r11, r4, #24\n\t" @@ -793,28 +813,32 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long register unsigned long len __asm__ ("r2") = (unsigned long)len_p; register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" +#else + "LDR r12, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "PUSH {%[ks]}\n\t" "CMP r12, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_ECB_encrypt_start_block_128\n\t" #else - "BEQ.N L_AES_ECB_encrypt_start_block_128_%=\n\t" + "BEQ.W L_AES_ECB_encrypt_start_block_128\n\t" #endif "CMP r12, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_ECB_encrypt_start_block_192\n\t" #else - "BEQ.N L_AES_ECB_encrypt_start_block_192_%=\n\t" + "BEQ.W L_AES_ECB_encrypt_start_block_192\n\t" #endif "\n" - "L_AES_ECB_encrypt_loop_block_256_%=:\n\t" + "L_AES_ECB_encrypt_loop_block_256:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -845,16 +869,20 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_encrypt_loop_block_256\n\t" +#else + "BNE.W L_AES_ECB_encrypt_loop_block_256\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t" + "B L_AES_ECB_encrypt_end\n\t" #else - "BNE.N L_AES_ECB_encrypt_loop_block_256_%=\n\t" + "B.N L_AES_ECB_encrypt_end\n\t" #endif - "B L_AES_ECB_encrypt_end_%=\n\t" "\n" - "L_AES_ECB_encrypt_start_block_192_%=:\n\t" + "L_AES_ECB_encrypt_start_block_192:\n\t" "\n" - "L_AES_ECB_encrypt_loop_block_192_%=:\n\t" + "L_AES_ECB_encrypt_loop_block_192:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -885,16 +913,20 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_encrypt_loop_block_192\n\t" +#else + "BNE.W L_AES_ECB_encrypt_loop_block_192\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t" + "B L_AES_ECB_encrypt_end\n\t" #else - "BNE.N L_AES_ECB_encrypt_loop_block_192_%=\n\t" + "B.N L_AES_ECB_encrypt_end\n\t" #endif - "B L_AES_ECB_encrypt_end_%=\n\t" "\n" - "L_AES_ECB_encrypt_start_block_128_%=:\n\t" + "L_AES_ECB_encrypt_start_block_128:\n\t" "\n" - "L_AES_ECB_encrypt_loop_block_128_%=:\n\t" + "L_AES_ECB_encrypt_loop_block_128:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -925,16 +957,22 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_encrypt_loop_block_128\n\t" #else - "BNE.N L_AES_ECB_encrypt_loop_block_128_%=\n\t" + "BNE.W L_AES_ECB_encrypt_loop_block_128\n\t" #endif "\n" - "L_AES_ECB_encrypt_end_%=:\n\t" + "L_AES_ECB_encrypt_end:\n\t" "POP {%[ks]}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), + [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr) + : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -956,30 +994,38 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r4\n\t" +#else + "LDR r8, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r9, r5\n\t" +#else + "LDR r9, [sp, #40]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" "LDM r9, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r9}\n\t" "CMP r8, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_encrypt_start_block_128\n\t" #else - "BEQ.N L_AES_CBC_encrypt_start_block_128_%=\n\t" + "BEQ.W L_AES_CBC_encrypt_start_block_128\n\t" #endif "CMP r8, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_encrypt_start_block_192\n\t" #else - "BEQ.N L_AES_CBC_encrypt_start_block_192_%=\n\t" + "BEQ.W L_AES_CBC_encrypt_start_block_192\n\t" #endif "\n" - "L_AES_CBC_encrypt_loop_block_256_%=:\n\t" + "L_AES_CBC_encrypt_loop_block_256:\n\t" "LDR r8, [lr]\n\t" "LDR r9, [lr, #4]\n\t" "LDR r10, [lr, #8]\n\t" @@ -1014,16 +1060,20 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_encrypt_loop_block_256\n\t" +#else + "BNE.W L_AES_CBC_encrypt_loop_block_256\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t" + "B L_AES_CBC_encrypt_end\n\t" #else - "BNE.N L_AES_CBC_encrypt_loop_block_256_%=\n\t" + "B.N L_AES_CBC_encrypt_end\n\t" #endif - "B L_AES_CBC_encrypt_end_%=\n\t" "\n" - "L_AES_CBC_encrypt_start_block_192_%=:\n\t" + "L_AES_CBC_encrypt_start_block_192:\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_192_%=:\n\t" + "L_AES_CBC_encrypt_loop_block_192:\n\t" "LDR r8, [lr]\n\t" "LDR r9, [lr, #4]\n\t" "LDR r10, [lr, #8]\n\t" @@ -1058,16 +1108,20 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_encrypt_loop_block_192\n\t" +#else + "BNE.W L_AES_CBC_encrypt_loop_block_192\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t" + "B L_AES_CBC_encrypt_end\n\t" #else - "BNE.N L_AES_CBC_encrypt_loop_block_192_%=\n\t" + "B.N L_AES_CBC_encrypt_end\n\t" #endif - "B L_AES_CBC_encrypt_end_%=\n\t" "\n" - "L_AES_CBC_encrypt_start_block_128_%=:\n\t" + "L_AES_CBC_encrypt_start_block_128:\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_128_%=:\n\t" + "L_AES_CBC_encrypt_loop_block_128:\n\t" "LDR r8, [lr]\n\t" "LDR r9, [lr, #4]\n\t" "LDR r10, [lr, #8]\n\t" @@ -1102,17 +1156,23 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_encrypt_loop_block_128\n\t" #else - "BNE.N L_AES_CBC_encrypt_loop_block_128_%=\n\t" + "BNE.W L_AES_CBC_encrypt_loop_block_128\n\t" #endif "\n" - "L_AES_CBC_encrypt_end_%=:\n\t" + "L_AES_CBC_encrypt_end:\n\t" "POP {%[ks], r9}\n\t" "STM r9, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), + [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv) + : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -1134,12 +1194,20 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" +#else + "LDR r12, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r5\n\t" +#else + "LDR r8, [sp, #40]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" @@ -1150,19 +1218,19 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STM r8, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r8}\n\t" "CMP r12, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CTR_encrypt_start_block_128\n\t" #else - "BEQ.N L_AES_CTR_encrypt_start_block_128_%=\n\t" + "BEQ.W L_AES_CTR_encrypt_start_block_128\n\t" #endif "CMP r12, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CTR_encrypt_start_block_192\n\t" #else - "BEQ.N L_AES_CTR_encrypt_start_block_192_%=\n\t" + "BEQ.W L_AES_CTR_encrypt_start_block_192\n\t" #endif "\n" - "L_AES_CTR_encrypt_loop_block_256_%=:\n\t" + "L_AES_CTR_encrypt_loop_block_256:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADDS r11, r7, #0x1\n\t" @@ -1201,16 +1269,20 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CTR_encrypt_loop_block_256\n\t" #else - "BNE.N L_AES_CTR_encrypt_loop_block_256_%=\n\t" + "BNE.W L_AES_CTR_encrypt_loop_block_256\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_CTR_encrypt_end\n\t" +#else + "B.W L_AES_CTR_encrypt_end\n\t" #endif - "B L_AES_CTR_encrypt_end_%=\n\t" "\n" - "L_AES_CTR_encrypt_start_block_192_%=:\n\t" + "L_AES_CTR_encrypt_start_block_192:\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_192_%=:\n\t" + "L_AES_CTR_encrypt_loop_block_192:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADDS r11, r7, #0x1\n\t" @@ -1249,16 +1321,20 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CTR_encrypt_loop_block_192\n\t" +#else + "BNE.W L_AES_CTR_encrypt_loop_block_192\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_CTR_encrypt_end\n\t" #else - "BNE.N L_AES_CTR_encrypt_loop_block_192_%=\n\t" + "B.W L_AES_CTR_encrypt_end\n\t" #endif - "B L_AES_CTR_encrypt_end_%=\n\t" "\n" - "L_AES_CTR_encrypt_start_block_128_%=:\n\t" + "L_AES_CTR_encrypt_start_block_128:\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_128_%=:\n\t" + "L_AES_CTR_encrypt_loop_block_128:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADDS r11, r7, #0x1\n\t" @@ -1297,21 +1373,27 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CTR_encrypt_loop_block_128\n\t" #else - "BNE.N L_AES_CTR_encrypt_loop_block_128_%=\n\t" + "BNE.W L_AES_CTR_encrypt_loop_block_128\n\t" #endif "\n" - "L_AES_CTR_encrypt_end_%=:\n\t" + "L_AES_CTR_encrypt_end:\n\t" "POP {%[ks], r8}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), + [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr) + : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -1334,7 +1416,7 @@ void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4) __asm__ __volatile__ ( "\n" - "L_AES_decrypt_block_nr_%=:\n\t" + "L_AES_decrypt_block_nr:\n\t" "UBFX r8, r7, #16, #8\n\t" "LSR r11, r4, #24\n\t" "UBFX r12, r6, #8, #8\n\t" @@ -1436,10 +1518,10 @@ void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4) "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" "SUBS %[nr], %[nr], #0x1\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_decrypt_block_nr_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_decrypt_block_nr\n\t" #else - "BNE.N L_AES_decrypt_block_nr_%=\n\t" + "BNE.W L_AES_decrypt_block_nr\n\t" #endif "UBFX r8, r7, #16, #8\n\t" "LSR r11, r4, #24\n\t" @@ -1598,30 +1680,34 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long register unsigned long len __asm__ ("r2") = (unsigned long)len_p; register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb; register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r6") = (unsigned char*)&L_AES_Thumb2_td4; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r4\n\t" +#else + "LDR r8, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" "MOV r12, %[len]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t" "CMP r8, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_ECB_decrypt_start_block_128\n\t" #else - "BEQ.N L_AES_ECB_decrypt_start_block_128_%=\n\t" + "BEQ.W L_AES_ECB_decrypt_start_block_128\n\t" #endif "CMP r8, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_ECB_decrypt_start_block_192\n\t" #else - "BEQ.N L_AES_ECB_decrypt_start_block_192_%=\n\t" + "BEQ.W L_AES_ECB_decrypt_start_block_192\n\t" #endif "\n" - "L_AES_ECB_decrypt_loop_block_256_%=:\n\t" + "L_AES_ECB_decrypt_loop_block_256:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1651,16 +1737,20 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_decrypt_loop_block_256\n\t" +#else + "BNE.W L_AES_ECB_decrypt_loop_block_256\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t" + "B L_AES_ECB_decrypt_end\n\t" #else - "BNE.N L_AES_ECB_decrypt_loop_block_256_%=\n\t" + "B.N L_AES_ECB_decrypt_end\n\t" #endif - "B L_AES_ECB_decrypt_end_%=\n\t" "\n" - "L_AES_ECB_decrypt_start_block_192_%=:\n\t" + "L_AES_ECB_decrypt_start_block_192:\n\t" "\n" - "L_AES_ECB_decrypt_loop_block_192_%=:\n\t" + "L_AES_ECB_decrypt_loop_block_192:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1690,16 +1780,20 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_decrypt_loop_block_192\n\t" +#else + "BNE.W L_AES_ECB_decrypt_loop_block_192\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t" + "B L_AES_ECB_decrypt_end\n\t" #else - "BNE.N L_AES_ECB_decrypt_loop_block_192_%=\n\t" + "B.N L_AES_ECB_decrypt_end\n\t" #endif - "B L_AES_ECB_decrypt_end_%=\n\t" "\n" - "L_AES_ECB_decrypt_start_block_128_%=:\n\t" + "L_AES_ECB_decrypt_start_block_128:\n\t" "\n" - "L_AES_ECB_decrypt_loop_block_128_%=:\n\t" + "L_AES_ECB_decrypt_loop_block_128:\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1729,15 +1823,21 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_ECB_decrypt_loop_block_128\n\t" #else - "BNE.N L_AES_ECB_decrypt_loop_block_128_%=\n\t" + "BNE.W L_AES_ECB_decrypt_loop_block_128\n\t" #endif "\n" - "L_AES_ECB_decrypt_end_%=:\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) + "L_AES_ECB_decrypt_end:\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), + [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr) + : [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb), [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -1759,32 +1859,40 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb; register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r7") = (unsigned char*)&L_AES_Thumb2_td4; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r4\n\t" +#else + "LDR r8, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r4, r5\n\t" +#else + "LDR r4, [sp, #40]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" "MOV r12, %[len]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t" "PUSH {%[ks], r4}\n\t" "CMP r8, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_decrypt_loop_block_128\n\t" #else - "BEQ.N L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "BEQ.W L_AES_CBC_decrypt_loop_block_128\n\t" #endif "CMP r8, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_decrypt_loop_block_192\n\t" #else - "BEQ.N L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "BEQ.W L_AES_CBC_decrypt_loop_block_192\n\t" #endif "\n" - "L_AES_CBC_decrypt_loop_block_256_%=:\n\t" + "L_AES_CBC_decrypt_loop_block_256:\n\t" "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" @@ -1824,10 +1932,10 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_decrypt_end_odd\n\t" #else - "BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t" + "BEQ.W L_AES_CBC_decrypt_end_odd\n\t" #endif "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" @@ -1869,14 +1977,18 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_decrypt_loop_block_256\n\t" #else - "BNE.N L_AES_CBC_decrypt_loop_block_256_%=\n\t" + "BNE.W L_AES_CBC_decrypt_loop_block_256\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_CBC_decrypt_end\n\t" +#else + "B.W L_AES_CBC_decrypt_end\n\t" #endif - "B L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_loop_block_192_%=:\n\t" + "L_AES_CBC_decrypt_loop_block_192:\n\t" "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" @@ -1916,10 +2028,10 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_decrypt_end_odd\n\t" #else - "BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t" + "BEQ.W L_AES_CBC_decrypt_end_odd\n\t" #endif "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" @@ -1961,14 +2073,18 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_decrypt_loop_block_192\n\t" +#else + "BNE.W L_AES_CBC_decrypt_loop_block_192\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_CBC_decrypt_end\n\t" #else - "BNE.N L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "B.W L_AES_CBC_decrypt_end\n\t" #endif - "B L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_loop_block_128_%=:\n\t" + "L_AES_CBC_decrypt_loop_block_128:\n\t" "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" @@ -2008,10 +2124,10 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_CBC_decrypt_end_odd\n\t" #else - "BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t" + "BEQ.W L_AES_CBC_decrypt_end_odd\n\t" #endif "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" @@ -2053,24 +2169,34 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" +#ifdef __GNUC__ + "BNE L_AES_CBC_decrypt_loop_block_128\n\t" +#else + "BNE.W L_AES_CBC_decrypt_loop_block_128\n\t" +#endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "B L_AES_CBC_decrypt_end\n\t" #else - "BNE.N L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "B.N L_AES_CBC_decrypt_end\n\t" #endif - "B L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_end_odd_%=:\n\t" + "L_AES_CBC_decrypt_end_odd:\n\t" "LDR r4, [sp, #4]\n\t" "LDRD r8, r9, [r4, #16]\n\t" "LDRD r10, r11, [r4, #24]\n\t" "STRD r8, r9, [r4]\n\t" "STRD r10, r11, [r4, #8]\n\t" "\n" - "L_AES_CBC_decrypt_end_%=:\n\t" + "L_AES_CBC_decrypt_end:\n\t" "POP {%[ks], r4}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), + [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv) + : [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb), [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r8", "r9", "r10", "r11" ); } @@ -2099,13 +2225,13 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha register const unsigned char** m __asm__ ("r1") = (const unsigned char**)m_p; register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p; register unsigned long len __asm__ ("r3") = (unsigned long)len_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_GCM_gmult_len_r_c __asm__ ("r4") = (uint32_t*)&L_GCM_gmult_len_r; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV lr, %[L_GCM_gmult_len_r]\n\t" "\n" - "L_GCM_gmult_len_start_block_%=:\n\t" + "L_GCM_gmult_len_start_block:\n\t" "PUSH {r3}\n\t" "LDR r12, [r0, #12]\n\t" "LDR %[len], [r2, #12]\n\t" @@ -2650,13 +2776,19 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "POP {r3}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD %[data], %[data], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_GCM_gmult_len_start_block_%=\n\t" +#ifdef __GNUC__ + "BNE L_GCM_gmult_len_start_block\n\t" #else - "BNE.N L_GCM_gmult_len_start_block_%=\n\t" + "BNE.W L_GCM_gmult_len_start_block\n\t" #endif - : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), + [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : +#else + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len) + : [L_GCM_gmult_len_r] "r" (L_GCM_gmult_len_r) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -2677,12 +2809,20 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" +#else + "LDR r12, [sp, #36]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r5\n\t" +#else + "LDR r8, [sp, #40]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_gcm]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" @@ -2693,19 +2833,19 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STM r8, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r8}\n\t" "CMP r12, #0xa\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_GCM_encrypt_start_block_128\n\t" #else - "BEQ.N L_AES_GCM_encrypt_start_block_128_%=\n\t" + "BEQ.W L_AES_GCM_encrypt_start_block_128\n\t" #endif "CMP r12, #0xc\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t" +#ifdef __GNUC__ + "BEQ L_AES_GCM_encrypt_start_block_192\n\t" #else - "BEQ.N L_AES_GCM_encrypt_start_block_192_%=\n\t" + "BEQ.W L_AES_GCM_encrypt_start_block_192\n\t" #endif "\n" - "L_AES_GCM_encrypt_loop_block_256_%=:\n\t" + "L_AES_GCM_encrypt_loop_block_256:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" @@ -2741,16 +2881,20 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_GCM_encrypt_loop_block_256\n\t" +#else + "BNE.W L_AES_GCM_encrypt_loop_block_256\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_GCM_encrypt_end\n\t" #else - "BNE.N L_AES_GCM_encrypt_loop_block_256_%=\n\t" + "B.W L_AES_GCM_encrypt_end\n\t" #endif - "B L_AES_GCM_encrypt_end_%=\n\t" "\n" - "L_AES_GCM_encrypt_start_block_192_%=:\n\t" + "L_AES_GCM_encrypt_start_block_192:\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_192_%=:\n\t" + "L_AES_GCM_encrypt_loop_block_192:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" @@ -2786,16 +2930,20 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_GCM_encrypt_loop_block_192\n\t" #else - "BNE.N L_AES_GCM_encrypt_loop_block_192_%=\n\t" + "BNE.W L_AES_GCM_encrypt_loop_block_192\n\t" +#endif +#ifdef __GNUC__ + "B L_AES_GCM_encrypt_end\n\t" +#else + "B.W L_AES_GCM_encrypt_end\n\t" #endif - "B L_AES_GCM_encrypt_end_%=\n\t" "\n" - "L_AES_GCM_encrypt_start_block_128_%=:\n\t" + "L_AES_GCM_encrypt_start_block_128:\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_128_%=:\n\t" + "L_AES_GCM_encrypt_loop_block_128:\n\t" "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" @@ -2831,21 +2979,27 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t" +#ifdef __GNUC__ + "BNE L_AES_GCM_encrypt_loop_block_128\n\t" #else - "BNE.N L_AES_GCM_encrypt_loop_block_128_%=\n\t" + "BNE.W L_AES_GCM_encrypt_loop_block_128\n\t" #endif "\n" - "L_AES_GCM_encrypt_end_%=:\n\t" + "L_AES_GCM_encrypt_end:\n\t" "POP {%[ks], r8}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_gcm] "+r" (L_AES_Thumb2_te_gcm_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), + [L_AES_Thumb2_te_gcm] "+r" (L_AES_Thumb2_te_gcm_c) : +#else + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr) + : [L_AES_Thumb2_te_gcm] "r" (L_AES_Thumb2_te_gcm) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -2854,7 +3008,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long #endif /* !NO_AES */ #endif /* !__aarch64__ && __thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index c5ca56b18a..e6b5dcf5d2 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -43,7 +43,7 @@ .type fe_init, %function fe_init: BX lr - # Cycle Count = 4 + /* Cycle Count = 4 */ .size fe_init,.-fe_init .text .align 4 @@ -51,62 +51,62 @@ fe_init: .type fe_add_sub_op, %function fe_add_sub_op: PUSH {lr} - # Add-Sub + /* Add-Sub */ LDRD r4, r5, [r2] LDRD r6, r7, [r3] - # Add + /* Add */ ADDS r8, r4, r6 MOV r12, #0x0 ADCS r9, r5, r7 ADC r12, r12, #0x0 STRD r8, r9, [r0] - # Sub + /* Sub */ SUBS r10, r4, r6 SBCS r11, r5, r7 STRD r10, r11, [r1] LDRD r4, r5, [r2, #8] LDRD r6, r7, [r3, #8] - # Sub + /* Sub */ SBCS r10, r4, r6 MOV lr, #0x0 SBCS r11, r5, r7 ADC lr, lr, #0x0 STRD r10, r11, [r1, #8] - # Add + /* Add */ SUBS r12, r12, #0x1 ADCS r8, r4, r6 ADCS r9, r5, r7 STRD r8, r9, [r0, #8] LDRD r4, r5, [r2, #16] LDRD r6, r7, [r3, #16] - # Add + /* Add */ ADCS r8, r4, r6 MOV r12, #0x0 ADCS r9, r5, r7 ADC r12, r12, #0x0 STRD r8, r9, [r0, #16] - # Sub + /* Sub */ SUBS lr, lr, #0x1 SBCS r10, r4, r6 SBCS r11, r5, r7 STRD r10, r11, [r1, #16] LDRD r4, r5, [r2, #24] LDRD r6, r7, [r3, #24] - # Sub + /* Sub */ SBCS r10, r4, r6 SBC r11, r5, r7 - # Add + /* Add */ SUBS r12, r12, #0x1 ADCS r8, r4, r6 MOV r12, #0x0 ADCS r9, r5, r7 ADC r12, r12, #0x0 - # Multiply -modulus by overflow + /* Multiply -modulus by overflow */ LSL r3, r12, #1 MOV r12, #0x13 ORR r3, r3, r9, LSR #31 MUL r12, r3, r12 - # Add -x*modulus (if overflow) + /* Add -x*modulus (if overflow) */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] ADDS r4, r4, r12 @@ -123,7 +123,7 @@ fe_add_sub_op: ADCS r8, r8, #0x0 ADC r9, r9, #0x0 STRD r8, r9, [r0, #24] - # Add -modulus on underflow + /* Add -modulus on underflow */ MOV lr, #0x13 AND lr, lr, r11, ASR #31 LDM r1, {r4, r5, r6, r7, r8, r9} @@ -137,9 +137,9 @@ fe_add_sub_op: SBCS r10, r10, #0x0 SBC r11, r11, #0x0 STM r1, {r4, r5, r6, r7, r8, r9, r10, r11} - # Done Add-Sub + /* Done Add-Sub */ POP {pc} - # Cycle Count = 134 + /* Cycle Count = 134 */ .size fe_add_sub_op,.-fe_add_sub_op .text .align 4 @@ -147,7 +147,7 @@ fe_add_sub_op: .type fe_sub_op, %function fe_sub_op: PUSH {lr} - # Sub + /* Sub */ LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr} LDM r1!, {r2, r3, r4, r5} SUBS r6, r2, r6 @@ -171,9 +171,9 @@ fe_sub_op: SBCS r12, r12, #0x0 SBC lr, lr, #0x0 STM r0, {r6, r7, r8, r9, r10, r11, r12, lr} - # Done Sub + /* Done Sub */ POP {pc} - # Cycle Count = 51 + /* Cycle Count = 51 */ .size fe_sub_op,.-fe_sub_op .text .align 4 @@ -183,7 +183,7 @@ fe_sub: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} BL fe_sub_op POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 24 + /* Cycle Count = 24 */ .size fe_sub,.-fe_sub .text .align 4 @@ -191,7 +191,7 @@ fe_sub: .type fe_add_op, %function fe_add_op: PUSH {lr} - # Add + /* Add */ LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr} LDM r1!, {r2, r3, r4, r5} ADDS r6, r2, r6 @@ -215,9 +215,9 @@ fe_add_op: ADCS r12, r12, #0x0 ADC lr, lr, #0x0 STM r0, {r6, r7, r8, r9, r10, r11, r12, lr} - # Done Add + /* Done Add */ POP {pc} - # Cycle Count = 51 + /* Cycle Count = 51 */ .size fe_add_op,.-fe_add_op .text .align 4 @@ -227,7 +227,7 @@ fe_add: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} BL fe_add_op POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 24 + /* Cycle Count = 24 */ .size fe_add,.-fe_add #ifdef HAVE_ED25519 .text @@ -254,7 +254,7 @@ fe_frombytes: STR r8, [r0, #24] STR r9, [r0, #28] POP {r4, r5, r6, r7, r8, r9, pc} - # Cycle Count = 49 + /* Cycle Count = 49 */ .size fe_frombytes,.-fe_frombytes .text .align 4 @@ -291,7 +291,7 @@ fe_tobytes: STR r8, [r0, #24] STR r9, [r0, #28] POP {r4, r5, r6, r7, r8, r9, r10, pc} - # Cycle Count = 62 + /* Cycle Count = 62 */ .size fe_tobytes,.-fe_tobytes .text .align 4 @@ -299,7 +299,7 @@ fe_tobytes: .type fe_1, %function fe_1: PUSH {r4, r5, r6, r7, r8, r9, lr} - # Set one + /* Set one */ MOV r2, #0x1 MOV r3, #0x0 MOV r4, #0x0 @@ -310,7 +310,7 @@ fe_1: MOV r9, #0x0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, pc} - # Cycle Count = 33 + /* Cycle Count = 33 */ .size fe_1,.-fe_1 .text .align 4 @@ -318,7 +318,7 @@ fe_1: .type fe_0, %function fe_0: PUSH {r4, r5, r6, r7, r8, r9, lr} - # Set zero + /* Set zero */ MOV r2, #0x0 MOV r3, #0x0 MOV r4, #0x0 @@ -329,7 +329,7 @@ fe_0: MOV r9, #0x0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, pc} - # Cycle Count = 33 + /* Cycle Count = 33 */ .size fe_0,.-fe_0 .text .align 4 @@ -337,7 +337,7 @@ fe_0: .type fe_copy, %function fe_copy: PUSH {r4, r5, lr} - # Copy + /* Copy */ LDRD r2, r3, [r1] LDRD r4, r5, [r1, #8] STRD r2, r3, [r0] @@ -347,7 +347,7 @@ fe_copy: STRD r2, r3, [r0, #16] STRD r4, r5, [r0, #24] POP {r4, r5, pc} - # Cycle Count = 32 + /* Cycle Count = 32 */ .size fe_copy,.-fe_copy .text .align 4 @@ -371,7 +371,7 @@ fe_neg: SBC r5, r6, r5 STM r0!, {r2, r3, r4, r5} POP {r4, r5, r6, r7, pc} - # Cycle Count = 43 + /* Cycle Count = 43 */ .size fe_neg,.-fe_neg .text .align 4 @@ -407,7 +407,7 @@ fe_isnonzero: ORR r2, r2, r8 ORR r0, r2, r4 POP {r4, r5, r6, r7, r8, r9, r10, pc} - # Cycle Count = 53 + /* Cycle Count = 53 */ .size fe_isnonzero,.-fe_isnonzero .text .align 4 @@ -430,7 +430,7 @@ fe_isnegative: LSR r1, r1, #31 EOR r0, r0, r1 POP {r4, r5, pc} - # Cycle Count = 31 + /* Cycle Count = 31 */ .size fe_isnegative,.-fe_isnegative #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) #ifndef WC_NO_CACHE_RESISTANT @@ -1404,7 +1404,7 @@ fe_cmov_table: STRD r6, r7, [r0, #56] STRD r8, r9, [r0, #88] POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 1195 + /* Cycle Count = 1195 */ .size fe_cmov_table,.-fe_cmov_table #else .text @@ -1506,7 +1506,7 @@ fe_cmov_table: STM r0!, {r4, r5, r6, r7} SUB r1, r1, r2 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 160 + /* Cycle Count = 160 */ .size fe_cmov_table,.-fe_cmov_table #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */ @@ -1522,329 +1522,329 @@ fe_mul_op: STR r0, [sp, #36] MOV r0, #0x0 LDR r12, [r1] - # A[0] * B[0] + /* A[0] * B[0] */ LDR lr, [r2] UMULL r3, r4, r12, lr - # A[0] * B[2] + /* A[0] * B[2] */ LDR lr, [r2, #8] UMULL r5, r6, r12, lr - # A[0] * B[4] + /* A[0] * B[4] */ LDR lr, [r2, #16] UMULL r7, r8, r12, lr - # A[0] * B[6] + /* A[0] * B[6] */ LDR lr, [r2, #24] UMULL r9, r10, r12, lr STR r3, [sp] - # A[0] * B[1] + /* A[0] * B[1] */ LDR lr, [r2, #4] MOV r11, r0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[0] * B[3] + /* A[0] * B[3] */ LDR lr, [r2, #12] ADCS r6, r6, #0x0 ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[0] * B[5] + /* A[0] * B[5] */ LDR lr, [r2, #20] ADCS r8, r8, #0x0 ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[0] * B[7] + /* A[0] * B[7] */ LDR lr, [r2, #28] ADCS r10, r10, #0x0 ADC r3, r0, #0x0 UMLAL r10, r3, r12, lr - # A[1] * B[0] + /* A[1] * B[0] */ LDR r12, [r1, #4] LDR lr, [r2] MOV r11, #0x0 UMLAL r4, r11, r12, lr STR r4, [sp, #4] ADDS r5, r5, r11 - # A[1] * B[1] + /* A[1] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[1] * B[2] + /* A[1] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[1] * B[3] + /* A[1] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[1] * B[4] + /* A[1] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[1] * B[5] + /* A[1] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[1] * B[6] + /* A[1] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[1] * B[7] + /* A[1] * B[7] */ LDR lr, [r2, #28] ADC r4, r0, #0x0 UMLAL r3, r4, r12, lr - # A[2] * B[0] + /* A[2] * B[0] */ LDR r12, [r1, #8] LDR lr, [r2] MOV r11, #0x0 UMLAL r5, r11, r12, lr STR r5, [sp, #8] ADDS r6, r6, r11 - # A[2] * B[1] + /* A[2] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[2] * B[2] + /* A[2] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[2] * B[3] + /* A[2] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[2] * B[4] + /* A[2] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[2] * B[5] + /* A[2] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[2] * B[6] + /* A[2] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[2] * B[7] + /* A[2] * B[7] */ LDR lr, [r2, #28] ADC r5, r0, #0x0 UMLAL r4, r5, r12, lr - # A[3] * B[0] + /* A[3] * B[0] */ LDR r12, [r1, #12] LDR lr, [r2] MOV r11, #0x0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 - # A[3] * B[1] + /* A[3] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[3] * B[2] + /* A[3] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[3] * B[3] + /* A[3] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[3] * B[4] + /* A[3] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[3] * B[5] + /* A[3] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[3] * B[6] + /* A[3] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[3] * B[7] + /* A[3] * B[7] */ LDR lr, [r2, #28] ADC r6, r0, #0x0 UMLAL r5, r6, r12, lr - # A[4] * B[0] + /* A[4] * B[0] */ LDR r12, [r1, #16] LDR lr, [r2] MOV r11, #0x0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 - # A[4] * B[1] + /* A[4] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[4] * B[2] + /* A[4] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[4] * B[3] + /* A[4] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[4] * B[4] + /* A[4] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[4] * B[5] + /* A[4] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[4] * B[6] + /* A[4] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[4] * B[7] + /* A[4] * B[7] */ LDR lr, [r2, #28] ADC r7, r0, #0x0 UMLAL r6, r7, r12, lr - # A[5] * B[0] + /* A[5] * B[0] */ LDR r12, [r1, #20] LDR lr, [r2] MOV r11, #0x0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 - # A[5] * B[1] + /* A[5] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[5] * B[2] + /* A[5] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[5] * B[3] + /* A[5] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[5] * B[4] + /* A[5] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[5] * B[5] + /* A[5] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[5] * B[6] + /* A[5] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[5] * B[7] + /* A[5] * B[7] */ LDR lr, [r2, #28] ADC r8, r0, #0x0 UMLAL r7, r8, r12, lr - # A[6] * B[0] + /* A[6] * B[0] */ LDR r12, [r1, #24] LDR lr, [r2] MOV r11, #0x0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 - # A[6] * B[1] + /* A[6] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[6] * B[2] + /* A[6] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[6] * B[3] + /* A[6] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[6] * B[4] + /* A[6] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[6] * B[5] + /* A[6] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[6] * B[6] + /* A[6] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[6] * B[7] + /* A[6] * B[7] */ LDR lr, [r2, #28] ADC r9, r0, #0x0 UMLAL r8, r9, r12, lr - # A[7] * B[0] + /* A[7] * B[0] */ LDR r12, [r1, #28] LDR lr, [r2] MOV r11, #0x0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 - # A[7] * B[1] + /* A[7] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[7] * B[2] + /* A[7] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[7] * B[3] + /* A[7] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[7] * B[4] + /* A[7] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[7] * B[5] + /* A[7] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[7] * B[6] + /* A[7] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[7] * B[7] + /* A[7] * B[7] */ LDR lr, [r2, #28] ADC r10, r0, #0x0 UMLAL r9, r10, r12, lr - # Reduce + /* Reduce */ LDR r2, [sp, #28] MOV lr, sp MOV r12, #0x26 @@ -1883,12 +1883,12 @@ fe_mul_op: UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 - # Store + /* Store */ LDR r0, [sp, #36] STM r0, {r1, r2, r3, r4, r5, r6, r7, r8} ADD sp, sp, #0x28 POP {pc} - # Cycle Count = 406 + /* Cycle Count = 406 */ .size fe_mul_op,.-fe_mul_op #else .text @@ -1995,7 +1995,7 @@ fe_mul_op: UMAAL r8, r10, r2, lr UMAAL r8, r9, r3, r11 UMAAL r9, r10, r3, lr - # Reduce + /* Reduce */ LDR r0, [sp, #28] MOV lr, #0x25 UMAAL r10, r0, r10, lr @@ -2017,11 +2017,11 @@ fe_mul_op: UMAAL r6, r11, r9, lr ADD r7, r10, r11 LDR lr, [sp, #8] - # Store + /* Store */ STM lr, {r0, r1, r2, r3, r4, r5, r6, r7} ADD sp, sp, #0x10 POP {pc} - # Cycle Count = 239 + /* Cycle Count = 239 */ .size fe_mul_op,.-fe_mul_op #endif /* WOLFSSL_SP_NO_UMAAL */ .text @@ -2032,7 +2032,7 @@ fe_mul: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} BL fe_mul_op POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 24 + /* Cycle Count = 24 */ .size fe_mul,.-fe_mul #ifdef WOLFSSL_SP_NO_UMAAL .text @@ -2043,33 +2043,33 @@ fe_sq_op: PUSH {lr} SUB sp, sp, #0x44 STR r0, [sp, #64] - # Square + /* Square */ MOV r0, #0x0 LDR r12, [r1] - # A[0] * A[1] + /* A[0] * A[1] */ LDR lr, [r1, #4] UMULL r4, r5, r12, lr - # A[0] * A[3] + /* A[0] * A[3] */ LDR lr, [r1, #12] UMULL r6, r7, r12, lr - # A[0] * A[5] + /* A[0] * A[5] */ LDR lr, [r1, #20] UMULL r8, r9, r12, lr - # A[0] * A[7] + /* A[0] * A[7] */ LDR lr, [r1, #28] UMULL r10, r3, r12, lr - # A[0] * A[2] + /* A[0] * A[2] */ LDR lr, [r1, #8] MOV r11, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[0] * A[4] + /* A[0] * A[4] */ LDR lr, [r1, #16] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[0] * A[6] + /* A[0] * A[6] */ LDR lr, [r1, #24] ADCS r9, r9, #0x0 ADC r11, r0, #0x0 @@ -2078,112 +2078,112 @@ fe_sq_op: ADCS r3, r3, #0x0 STR r4, [sp, #4] STR r5, [sp, #8] - # A[1] * A[2] + /* A[1] * A[2] */ LDR r12, [r1, #4] LDR lr, [r1, #8] MOV r11, #0x0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 - # A[1] * A[3] + /* A[1] * A[3] */ LDR lr, [r1, #12] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 - # A[1] * A[4] + /* A[1] * A[4] */ LDR lr, [r1, #16] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[1] * A[5] + /* A[1] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[1] * A[6] + /* A[1] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[1] * A[7] + /* A[1] * A[7] */ LDR lr, [r1, #28] ADC r4, r0, #0x0 UMLAL r3, r4, r12, lr - # A[2] * A[3] + /* A[2] * A[3] */ LDR r12, [r1, #8] LDR lr, [r1, #12] MOV r11, #0x0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 - # A[2] * A[4] + /* A[2] * A[4] */ LDR lr, [r1, #16] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 - # A[2] * A[5] + /* A[2] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[2] * A[6] + /* A[2] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[2] * A[7] + /* A[2] * A[7] */ LDR lr, [r1, #28] ADC r5, r0, #0x0 UMLAL r4, r5, r12, lr - # A[3] * A[4] + /* A[3] * A[4] */ LDR r12, [r1, #12] LDR lr, [r1, #16] MOV r11, #0x0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 - # A[3] * A[5] + /* A[3] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[3] * A[6] + /* A[3] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[3] * A[7] + /* A[3] * A[7] */ LDR lr, [r1, #28] ADC r6, r0, #0x0 UMLAL r5, r6, r12, lr - # A[4] * A[5] + /* A[4] * A[5] */ LDR r12, [r1, #16] LDR lr, [r1, #20] MOV r11, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[4] * A[6] + /* A[4] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[4] * A[7] + /* A[4] * A[7] */ LDR lr, [r1, #28] ADC r7, r0, #0x0 UMLAL r6, r7, r12, lr - # A[5] * A[6] + /* A[5] * A[6] */ LDR r12, [r1, #20] LDR lr, [r1, #24] MOV r11, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[5] * A[7] + /* A[5] * A[7] */ LDR lr, [r1, #28] ADC r8, r0, #0x0 UMLAL r7, r8, r12, lr - # A[6] * A[7] + /* A[6] * A[7] */ LDR r12, [r1, #24] LDR lr, [r1, #28] MOV r9, #0x0 @@ -2213,23 +2213,23 @@ fe_sq_op: ADD lr, sp, #0x4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} MOV lr, sp - # A[0] * A[0] + /* A[0] * A[0] */ LDR r12, [r1] UMULL r3, r11, r12, r12 ADDS r4, r4, r11 - # A[1] * A[1] + /* A[1] * A[1] */ LDR r12, [r1, #4] ADCS r5, r5, #0x0 ADC r11, r0, #0x0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 - # A[2] * A[2] + /* A[2] * A[2] */ LDR r12, [r1, #8] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 - # A[3] * A[3] + /* A[3] * A[3] */ LDR r12, [r1, #12] ADCS r9, r9, #0x0 ADC r11, r0, #0x0 @@ -2237,30 +2237,30 @@ fe_sq_op: ADDS r10, r10, r11 STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10} LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10} - # A[4] * A[4] + /* A[4] * A[4] */ LDR r12, [r1, #16] ADCS r3, r3, #0x0 ADC r11, r0, #0x0 UMLAL r3, r11, r12, r12 ADDS r4, r4, r11 - # A[5] * A[5] + /* A[5] * A[5] */ LDR r12, [r1, #20] ADCS r5, r5, #0x0 ADC r11, r0, #0x0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 - # A[6] * A[6] + /* A[6] * A[6] */ LDR r12, [r1, #24] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 - # A[7] * A[7] + /* A[7] * A[7] */ LDR r12, [r1, #28] ADCS r9, r9, #0x0 ADC r10, r10, #0x0 UMLAL r9, r10, r12, r12 - # Reduce + /* Reduce */ LDR r2, [sp, #28] MOV lr, sp MOV r12, #0x26 @@ -2299,12 +2299,12 @@ fe_sq_op: UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 - # Store + /* Store */ LDR r0, [sp, #64] STM r0, {r1, r2, r3, r4, r5, r6, r7, r8} ADD sp, sp, #0x44 POP {pc} - # Cycle Count = 355 + /* Cycle Count = 355 */ .size fe_sq_op,.-fe_sq_op #else .text @@ -2316,7 +2316,7 @@ fe_sq_op: SUB sp, sp, #0x20 STR r0, [sp, #28] LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7} - # Square + /* Square */ UMULL r9, r10, r0, r0 UMULL r11, r12, r0, r1 ADDS r11, r11, r11 @@ -2357,46 +2357,46 @@ fe_sq_op: UMAAL r0, r10, r3, r4 ADCS r0, r0, r0 UMAAL r0, r11, lr, lr - # R[7] = r0 + /* R[7] = r0 */ UMAAL r9, r8, r1, r7 UMAAL r9, r10, r2, r6 UMAAL r12, r9, r3, r5 ADCS r12, r12, r12 UMAAL r12, r11, r4, r4 - # R[8] = r12 + /* R[8] = r12 */ UMAAL r9, r8, r2, r7 UMAAL r10, r9, r3, r6 MOV r2, lr UMAAL r10, r2, r4, r5 ADCS r10, r10, r10 UMAAL r11, r10, lr, lr - # R[9] = r11 + /* R[9] = r11 */ UMAAL r2, r8, r3, r7 UMAAL r2, r9, r4, r6 ADCS r3, r2, r2 UMAAL r10, r3, r5, r5 - # R[10] = r10 + /* R[10] = r10 */ MOV r1, lr UMAAL r1, r8, r4, r7 UMAAL r1, r9, r5, r6 ADCS r4, r1, r1 UMAAL r3, r4, lr, lr - # R[11] = r3 + /* R[11] = r3 */ UMAAL r8, r9, r5, r7 ADCS r8, r8, r8 UMAAL r4, r8, r6, r6 - # R[12] = r4 + /* R[12] = r4 */ MOV r5, lr UMAAL r5, r9, r6, r7 ADCS r5, r5, r5 UMAAL r8, r5, lr, lr - # R[13] = r8 + /* R[13] = r8 */ ADCS r9, r9, r9 UMAAL r9, r5, r7, r7 ADCS r7, r5, lr - # R[14] = r9 - # R[15] = r7 - # Reduce + /* R[14] = r9 */ + /* R[15] = r7 */ + /* Reduce */ MOV r6, #0x25 UMAAL r7, r0, r7, r6 MOV r6, #0x13 @@ -2420,10 +2420,10 @@ fe_sq_op: UMAAL r6, lr, r9, r12 ADD r7, r7, lr POP {lr} - # Store + /* Store */ STM lr, {r0, r1, r2, r3, r4, r5, r6, r7} POP {pc} - # Cycle Count = 179 + /* Cycle Count = 179 */ .size fe_sq_op,.-fe_sq_op #endif /* WOLFSSL_SP_NO_UMAAL */ .text @@ -2434,7 +2434,7 @@ fe_sq: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} BL fe_sq_op POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 24 + /* Cycle Count = 24 */ .size fe_sq,.-fe_sq #ifdef HAVE_CURVE25519 #ifdef WOLFSSL_SP_NO_UMAAL @@ -2444,7 +2444,7 @@ fe_sq: .type fe_mul121666, %function fe_mul121666: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - # Multiply by 121666 + /* Multiply by 121666 */ LDM r1, {r2, r3, r4, r5, r6, r7, r8, r9} MOV r12, #0xdb42 MOVT r12, #0x1 @@ -2485,7 +2485,7 @@ fe_mul121666: ADC r9, r9, #0x0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 75 + /* Cycle Count = 75 */ .size fe_mul121666,.-fe_mul121666 #else .text @@ -2494,7 +2494,7 @@ fe_mul121666: .type fe_mul121666, %function fe_mul121666: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - # Multiply by 121666 + /* Multiply by 121666 */ LDM r1, {r2, r3, r4, r5, r6, r7, r8, r9} MOV r11, #0xdb42 MOVT r11, #0x1 @@ -2522,7 +2522,7 @@ fe_mul121666: ADC r9, r9, #0x0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 69 + /* Cycle Count = 69 */ .size fe_mul121666,.-fe_mul121666 #endif /* WOLFSSL_SP_NO_UMAAL */ #ifndef WC_NO_CACHE_RESISTANT @@ -2553,7 +2553,7 @@ curve25519: MOV r3, sp STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 - # Copy + /* Copy */ LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} MOV r1, #0x1e @@ -2572,7 +2572,7 @@ L_curve25519_bits: EOR r1, r1, r2 STR r1, [sp, #172] LDR r0, [sp, #160] - # Conditional Swap + /* Conditional Swap */ RSB r1, r1, #0x0 MOV r3, r0 ADD r12, sp, #0x40 @@ -2625,7 +2625,7 @@ L_curve25519_bits: STM r3!, {r4, r5} STM r12!, {r6, r7} LDR r1, [sp, #172] - # Conditional Swap + /* Conditional Swap */ RSB r1, r1, #0x0 MOV r3, sp ADD r12, sp, #0x20 @@ -2741,21 +2741,21 @@ L_curve25519_bits: LDR r1, [sp, #180] SUBS r1, r1, #0x1 STR r1, [sp, #180] -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BGE L_curve25519_bits #else - BGE.N L_curve25519_bits + BGE.W L_curve25519_bits #endif MOV r1, #0x1f STR r1, [sp, #180] SUBS r2, r2, #0x4 STR r2, [sp, #176] -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BGE L_curve25519_words #else - BGE.N L_curve25519_words + BGE.W L_curve25519_words #endif - # Invert + /* Invert */ ADD r1, sp, #0x0 ADD r0, sp, #0x20 BL fe_sq_op @@ -2938,7 +2938,7 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xbc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 682 + /* Cycle Count = 682 */ .size curve25519,.-curve25519 #else .text @@ -2973,7 +2973,7 @@ curve25519: MOV r3, sp STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 - # Copy + /* Copy */ LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} MOV r2, #0xfe @@ -2989,7 +2989,7 @@ L_curve25519_bits: EOR r1, r1, r2 ASR r1, r1, #31 STR r2, [sp, #164] - # Conditional Swap + /* Conditional Swap */ ADD r11, sp, #0xb0 LDM r11, {r4, r5, r6, r7} EOR r8, r4, r5 @@ -3001,7 +3001,7 @@ L_curve25519_bits: EOR r6, r6, r9 EOR r7, r7, r9 STM r11, {r4, r5, r6, r7} - # Ladder step + /* Ladder step */ LDR r3, [sp, #184] LDR r2, [sp, #176] ADD r1, sp, #0x80 @@ -3067,12 +3067,12 @@ L_curve25519_bits: #else BGE.N L_curve25519_bits #endif - # Cycle Count: 171 + /* Cycle Count: 171 */ LDR r1, [sp, #184] - # Copy + /* Copy */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} STM sp, {r4, r5, r6, r7, r8, r9, r10, r11} - # Invert + /* Invert */ ADD r1, sp, #0x0 ADD r0, sp, #0x20 BL fe_sq_op @@ -3252,7 +3252,7 @@ L_curve25519_inv_8: LDR r1, [sp, #176] LDR r0, [sp, #176] BL fe_mul_op - # Ensure result is less than modulus + /* Ensure result is less than modulus */ LDR r0, [sp, #176] LDM r0, {r4, r5, r6, r7, r8, r9, r10, r11} MOV r2, #0x13 @@ -3270,7 +3270,7 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 589 + /* Cycle Count = 589 */ .size curve25519,.-curve25519 #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_CURVE25519 */ @@ -3282,7 +3282,7 @@ L_curve25519_inv_8: fe_invert: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x88 - # Invert + /* Invert */ STR r0, [sp, #128] STR r1, [sp, #132] LDR r1, [sp, #132] @@ -3464,7 +3464,7 @@ L_fe_invert8: LDR r0, [sp, #128] ADD sp, sp, #0x88 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 292 + /* Cycle Count = 292 */ .size fe_invert,.-fe_invert #ifdef WOLFSSL_SP_NO_UMAAL .text @@ -3475,33 +3475,33 @@ fe_sq2: PUSH {lr} SUB sp, sp, #0x44 STR r0, [sp, #64] - # Square * 2 + /* Square * 2 */ MOV r0, #0x0 LDR r12, [r1] - # A[0] * A[1] + /* A[0] * A[1] */ LDR lr, [r1, #4] UMULL r4, r5, r12, lr - # A[0] * A[3] + /* A[0] * A[3] */ LDR lr, [r1, #12] UMULL r6, r7, r12, lr - # A[0] * A[5] + /* A[0] * A[5] */ LDR lr, [r1, #20] UMULL r8, r9, r12, lr - # A[0] * A[7] + /* A[0] * A[7] */ LDR lr, [r1, #28] UMULL r10, r3, r12, lr - # A[0] * A[2] + /* A[0] * A[2] */ LDR lr, [r1, #8] MOV r11, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[0] * A[4] + /* A[0] * A[4] */ LDR lr, [r1, #16] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[0] * A[6] + /* A[0] * A[6] */ LDR lr, [r1, #24] ADCS r9, r9, #0x0 ADC r11, r0, #0x0 @@ -3510,112 +3510,112 @@ fe_sq2: ADCS r3, r3, #0x0 STR r4, [sp, #4] STR r5, [sp, #8] - # A[1] * A[2] + /* A[1] * A[2] */ LDR r12, [r1, #4] LDR lr, [r1, #8] MOV r11, #0x0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 - # A[1] * A[3] + /* A[1] * A[3] */ LDR lr, [r1, #12] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 - # A[1] * A[4] + /* A[1] * A[4] */ LDR lr, [r1, #16] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[1] * A[5] + /* A[1] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[1] * A[6] + /* A[1] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[1] * A[7] + /* A[1] * A[7] */ LDR lr, [r1, #28] ADC r4, r0, #0x0 UMLAL r3, r4, r12, lr - # A[2] * A[3] + /* A[2] * A[3] */ LDR r12, [r1, #8] LDR lr, [r1, #12] MOV r11, #0x0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 - # A[2] * A[4] + /* A[2] * A[4] */ LDR lr, [r1, #16] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 - # A[2] * A[5] + /* A[2] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[2] * A[6] + /* A[2] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[2] * A[7] + /* A[2] * A[7] */ LDR lr, [r1, #28] ADC r5, r0, #0x0 UMLAL r4, r5, r12, lr - # A[3] * A[4] + /* A[3] * A[4] */ LDR r12, [r1, #12] LDR lr, [r1, #16] MOV r11, #0x0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 - # A[3] * A[5] + /* A[3] * A[5] */ LDR lr, [r1, #20] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[3] * A[6] + /* A[3] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[3] * A[7] + /* A[3] * A[7] */ LDR lr, [r1, #28] ADC r6, r0, #0x0 UMLAL r5, r6, r12, lr - # A[4] * A[5] + /* A[4] * A[5] */ LDR r12, [r1, #16] LDR lr, [r1, #20] MOV r11, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[4] * A[6] + /* A[4] * A[6] */ LDR lr, [r1, #24] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[4] * A[7] + /* A[4] * A[7] */ LDR lr, [r1, #28] ADC r7, r0, #0x0 UMLAL r6, r7, r12, lr - # A[5] * A[6] + /* A[5] * A[6] */ LDR r12, [r1, #20] LDR lr, [r1, #24] MOV r11, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[5] * A[7] + /* A[5] * A[7] */ LDR lr, [r1, #28] ADC r8, r0, #0x0 UMLAL r7, r8, r12, lr - # A[6] * A[7] + /* A[6] * A[7] */ LDR r12, [r1, #24] LDR lr, [r1, #28] MOV r9, #0x0 @@ -3645,23 +3645,23 @@ fe_sq2: ADD lr, sp, #0x4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} MOV lr, sp - # A[0] * A[0] + /* A[0] * A[0] */ LDR r12, [r1] UMULL r3, r11, r12, r12 ADDS r4, r4, r11 - # A[1] * A[1] + /* A[1] * A[1] */ LDR r12, [r1, #4] ADCS r5, r5, #0x0 ADC r11, r0, #0x0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 - # A[2] * A[2] + /* A[2] * A[2] */ LDR r12, [r1, #8] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 - # A[3] * A[3] + /* A[3] * A[3] */ LDR r12, [r1, #12] ADCS r9, r9, #0x0 ADC r11, r0, #0x0 @@ -3669,30 +3669,30 @@ fe_sq2: ADDS r10, r10, r11 STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10} LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10} - # A[4] * A[4] + /* A[4] * A[4] */ LDR r12, [r1, #16] ADCS r3, r3, #0x0 ADC r11, r0, #0x0 UMLAL r3, r11, r12, r12 ADDS r4, r4, r11 - # A[5] * A[5] + /* A[5] * A[5] */ LDR r12, [r1, #20] ADCS r5, r5, #0x0 ADC r11, r0, #0x0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 - # A[6] * A[6] + /* A[6] * A[6] */ LDR r12, [r1, #24] ADCS r7, r7, #0x0 ADC r11, r0, #0x0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 - # A[7] * A[7] + /* A[7] * A[7] */ LDR r12, [r1, #28] ADCS r9, r9, #0x0 ADC r10, r10, #0x0 UMLAL r9, r10, r12, r12 - # Reduce + /* Reduce */ LDR r2, [sp, #28] MOV lr, sp MOV r12, #0x26 @@ -3731,7 +3731,7 @@ fe_sq2: UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 - # Reduce if top bit set + /* Reduce if top bit set */ MOV r12, #0x13 AND r11, r12, r8, ASR #31 ADDS r1, r1, r11 @@ -3743,7 +3743,7 @@ fe_sq2: BFC r8, #31, #1 ADCS r7, r7, #0x0 ADC r8, r8, #0x0 - # Double + /* Double */ ADDS r1, r1, r1 ADCS r2, r2, r2 ADCS r3, r3, r3 @@ -3752,7 +3752,7 @@ fe_sq2: ADCS r6, r6, r6 ADCS r7, r7, r7 ADC r8, r8, r8 - # Reduce if top bit set + /* Reduce if top bit set */ MOV r12, #0x13 AND r11, r12, r8, ASR #31 ADDS r1, r1, r11 @@ -3764,12 +3764,12 @@ fe_sq2: BFC r8, #31, #1 ADCS r7, r7, #0x0 ADC r8, r8, #0x0 - # Store + /* Store */ LDR r0, [sp, #64] STM r0, {r1, r2, r3, r4, r5, r6, r7, r8} ADD sp, sp, #0x44 POP {pc} - # Cycle Count = 385 + /* Cycle Count = 385 */ .size fe_sq2,.-fe_sq2 #else .text @@ -3781,7 +3781,7 @@ fe_sq2: SUB sp, sp, #0x24 STRD r0, r1, [sp, #28] LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7} - # Square * 2 + /* Square * 2 */ UMULL r9, r10, r0, r0 UMULL r11, r12, r0, r1 ADDS r11, r11, r11 @@ -3822,46 +3822,46 @@ fe_sq2: UMAAL r0, r10, r3, r4 ADCS r0, r0, r0 UMAAL r0, r11, lr, lr - # R[7] = r0 + /* R[7] = r0 */ UMAAL r9, r8, r1, r7 UMAAL r9, r10, r2, r6 UMAAL r12, r9, r3, r5 ADCS r12, r12, r12 UMAAL r12, r11, r4, r4 - # R[8] = r12 + /* R[8] = r12 */ UMAAL r9, r8, r2, r7 UMAAL r10, r9, r3, r6 MOV r2, lr UMAAL r10, r2, r4, r5 ADCS r10, r10, r10 UMAAL r11, r10, lr, lr - # R[9] = r11 + /* R[9] = r11 */ UMAAL r2, r8, r3, r7 UMAAL r2, r9, r4, r6 ADCS r3, r2, r2 UMAAL r10, r3, r5, r5 - # R[10] = r10 + /* R[10] = r10 */ MOV r1, lr UMAAL r1, r8, r4, r7 UMAAL r1, r9, r5, r6 ADCS r4, r1, r1 UMAAL r3, r4, lr, lr - # R[11] = r3 + /* R[11] = r3 */ UMAAL r8, r9, r5, r7 ADCS r8, r8, r8 UMAAL r4, r8, r6, r6 - # R[12] = r4 + /* R[12] = r4 */ MOV r5, lr UMAAL r5, r9, r6, r7 ADCS r5, r5, r5 UMAAL r8, r5, lr, lr - # R[13] = r8 + /* R[13] = r8 */ ADCS r9, r9, r9 UMAAL r9, r5, r7, r7 ADCS r7, r5, lr - # R[14] = r9 - # R[15] = r7 - # Reduce + /* R[14] = r9 */ + /* R[15] = r7 */ + /* Reduce */ MOV r6, #0x25 UMAAL r7, r0, r7, r6 MOV r6, #0x13 @@ -3884,7 +3884,7 @@ fe_sq2: BFC r7, #31, #1 UMAAL r6, lr, r9, r12 ADD r7, r7, lr - # Reduce if top bit set + /* Reduce if top bit set */ MOV r11, #0x13 AND r12, r11, r7, ASR #31 ADDS r0, r0, r12 @@ -3896,7 +3896,7 @@ fe_sq2: BFC r7, #31, #1 ADCS r6, r6, #0x0 ADC r7, r7, #0x0 - # Double + /* Double */ ADDS r0, r0, r0 ADCS r1, r1, r1 ADCS r2, r2, r2 @@ -3905,7 +3905,7 @@ fe_sq2: ADCS r5, r5, r5 ADCS r6, r6, r6 ADC r7, r7, r7 - # Reduce if top bit set + /* Reduce if top bit set */ MOV r11, #0x13 AND r12, r11, r7, ASR #31 ADDS r0, r0, r12 @@ -3918,12 +3918,12 @@ fe_sq2: ADCS r6, r6, #0x0 ADC r7, r7, #0x0 POP {r12, lr} - # Store + /* Store */ STM r12, {r0, r1, r2, r3, r4, r5, r6, r7} MOV r0, r12 MOV r1, lr POP {pc} - # Cycle Count = 213 + /* Cycle Count = 213 */ .size fe_sq2,.-fe_sq2 #endif /* WOLFSSL_SP_NO_UMAAL */ .text @@ -3933,7 +3933,7 @@ fe_sq2: fe_pow22523: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x68 - # pow22523 + /* pow22523 */ STR r0, [sp, #96] STR r1, [sp, #100] LDR r1, [sp, #100] @@ -4115,7 +4115,7 @@ L_fe_pow22523_8: LDR r0, [sp, #96] ADD sp, sp, #0x68 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 293 + /* Cycle Count = 293 */ .size fe_pow22523,.-fe_pow22523 .text .align 4 @@ -4142,7 +4142,7 @@ ge_p1p1_to_p2: BL fe_mul_op ADD sp, sp, #0x8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 53 + /* Cycle Count = 53 */ .size ge_p1p1_to_p2,.-ge_p1p1_to_p2 .text .align 4 @@ -4174,7 +4174,7 @@ ge_p1p1_to_p3: BL fe_mul_op ADD sp, sp, #0x8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 63 + /* Cycle Count = 63 */ .size ge_p1p1_to_p3,.-ge_p1p1_to_p3 .text .align 4 @@ -4218,7 +4218,7 @@ ge_p2_dbl: BL fe_sub_op ADD sp, sp, #0x8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 87 + /* Cycle Count = 87 */ .size ge_p2_dbl,.-ge_p2_dbl .text .align 4 @@ -4264,7 +4264,7 @@ ge_madd: LDR r1, [sp, #4] ADD r1, r1, #0x40 ADD r0, r0, #0x20 - # Double + /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -4290,13 +4290,13 @@ ge_madd: ADCS r10, r10, #0x0 ADC r11, r11, #0x0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - # Done Double + /* Done Double */ ADD r3, r0, #0x20 ADD r1, r0, #0x20 BL fe_add_sub_op ADD sp, sp, #0xc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 136 + /* Cycle Count = 136 */ .size ge_madd,.-ge_madd .text .align 4 @@ -4342,7 +4342,7 @@ ge_msub: LDR r1, [sp, #4] ADD r1, r1, #0x40 ADD r0, r0, #0x20 - # Double + /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -4368,14 +4368,14 @@ ge_msub: ADCS r10, r10, #0x0 ADC r11, r11, #0x0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - # Done Double + /* Done Double */ ADD r3, r0, #0x20 MOV r1, r0 ADD r0, r0, #0x20 BL fe_add_sub_op ADD sp, sp, #0xc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 137 + /* Cycle Count = 137 */ .size ge_msub,.-ge_msub .text .align 4 @@ -4416,7 +4416,7 @@ ge_add: BL fe_mul_op LDR r1, [sp] ADD r0, sp, #0xc - # Double + /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -4442,7 +4442,7 @@ ge_add: ADCS r10, r10, #0x0 ADC r11, r11, #0x0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - # Done Double + /* Done Double */ ADD r3, r1, #0x20 ADD r2, r1, #0x40 ADD r0, r1, #0x20 @@ -4454,7 +4454,7 @@ ge_add: BL fe_add_sub_op ADD sp, sp, #0x2c POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 138 + /* Cycle Count = 138 */ .size ge_add,.-ge_add .text .align 4 @@ -4495,7 +4495,7 @@ ge_sub: BL fe_mul_op LDR r1, [sp] ADD r0, sp, #0xc - # Double + /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -4521,7 +4521,7 @@ ge_sub: ADCS r10, r10, #0x0 ADC r11, r11, #0x0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - # Done Double + /* Done Double */ ADD r3, r1, #0x20 ADD r2, r1, #0x40 ADD r0, r1, #0x20 @@ -4533,7 +4533,7 @@ ge_sub: BL fe_add_sub_op ADD sp, sp, #0x2c POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 138 + /* Cycle Count = 138 */ .size ge_sub,.-ge_sub #ifdef WOLFSSL_SP_NO_UMAAL .text @@ -4544,7 +4544,7 @@ sc_reduce: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x38 STR r0, [sp, #52] - # Load bits 252-511 + /* Load bits 252-511 */ ADD r0, r0, #0x1c LDM r0, {r1, r2, r3, r4, r5, r6, r7, r8, r9} LSR lr, r9, #24 @@ -4566,7 +4566,7 @@ sc_reduce: ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 SUB r0, r0, #0x1c - # Add order times bits 504..511 + /* Add order times bits 504..511 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 @@ -4597,7 +4597,7 @@ sc_reduce: SBCS r7, r7, #0x0 SBCS r8, r8, #0x0 SBC r9, r9, #0x0 - # Sub product of top 8 words and order + /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a @@ -4765,7 +4765,7 @@ sc_reduce: UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} SUB r12, r12, #0x20 - # Subtract at 4 * 32 + /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 SBCS r11, r11, r3 @@ -4784,7 +4784,7 @@ sc_reduce: STM r12!, {r10, r11} SUB r12, r12, #0x24 ASR lr, r11, #25 - # Conditionally subtract order starting at bit 125 + /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 MOV r2, #0xba7d MOVT r2, #0x4b9e @@ -4822,7 +4822,7 @@ sc_reduce: STM r12!, {r10} SUB r0, r0, #0x10 MOV r12, sp - # Load bits 252-376 + /* Load bits 252-376 */ ADD r12, r12, #0x1c LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 @@ -4835,9 +4835,9 @@ sc_reduce: ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 SUB r12, r12, #0x1c - # Sub product of top 4 words and order + /* Sub product of top 4 words and order */ MOV r0, sp - # * -5cf5d3ed + /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a MOV lr, #0x0 @@ -4857,7 +4857,7 @@ sc_reduce: UMLAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -5812631b + /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed MOV r10, #0x0 @@ -4877,7 +4877,7 @@ sc_reduce: UMLAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -a2f79cd7 + /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 MOV r11, #0x0 @@ -4897,7 +4897,7 @@ sc_reduce: UMLAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -14def9df + /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 MOV r12, #0x0 @@ -4917,14 +4917,14 @@ sc_reduce: UMLAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # Add overflows at 4 * 32 + /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 ADDS r6, r6, lr ADCS r7, r7, r10 ADCS r8, r8, r11 ADC r9, r9, r12 - # Subtract top at 4 * 32 + /* Subtract top at 4 * 32 */ SUBS r6, r6, r2 SBCS r7, r7, r3 SBCS r8, r8, r4 @@ -4954,12 +4954,12 @@ sc_reduce: ADCS r8, r8, #0x0 ADC r9, r9, r1 BFC r9, #28, #4 - # Store result + /* Store result */ LDR r0, [sp, #52] STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} ADD sp, sp, #0x38 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 588 + /* Cycle Count = 588 */ .size sc_reduce,.-sc_reduce #else .text @@ -4970,7 +4970,7 @@ sc_reduce: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x38 STR r0, [sp, #52] - # Load bits 252-511 + /* Load bits 252-511 */ ADD r0, r0, #0x1c LDM r0, {r1, r2, r3, r4, r5, r6, r7, r8, r9} LSR lr, r9, #24 @@ -4992,7 +4992,7 @@ sc_reduce: ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 SUB r0, r0, #0x1c - # Add order times bits 504..511 + /* Add order times bits 504..511 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 @@ -5014,7 +5014,7 @@ sc_reduce: SBCS r7, r7, #0x0 SBCS r8, r8, #0x0 SBC r9, r9, #0x0 - # Sub product of top 8 words and order + /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a @@ -5098,7 +5098,7 @@ sc_reduce: UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} SUB r12, r12, #0x20 - # Subtract at 4 * 32 + /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 SBCS r11, r11, r3 @@ -5117,7 +5117,7 @@ sc_reduce: STM r12!, {r10, r11} SUB r12, r12, #0x24 ASR lr, r11, #25 - # Conditionally subtract order starting at bit 125 + /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 MOV r2, #0xba7d MOVT r2, #0x4b9e @@ -5155,7 +5155,7 @@ sc_reduce: STM r12!, {r10} SUB r0, r0, #0x10 MOV r12, sp - # Load bits 252-376 + /* Load bits 252-376 */ ADD r12, r12, #0x1c LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 @@ -5168,9 +5168,9 @@ sc_reduce: ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 SUB r12, r12, #0x1c - # Sub product of top 4 words and order + /* Sub product of top 4 words and order */ MOV r0, sp - # * -5cf5d3ed + /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a MOV lr, #0x0 @@ -5181,7 +5181,7 @@ sc_reduce: UMAAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -5812631b + /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed MOV r10, #0x0 @@ -5192,7 +5192,7 @@ sc_reduce: UMAAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -a2f79cd7 + /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 MOV r11, #0x0 @@ -5203,7 +5203,7 @@ sc_reduce: UMAAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -14def9df + /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 MOV r12, #0x0 @@ -5214,14 +5214,14 @@ sc_reduce: UMAAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # Add overflows at 4 * 32 + /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 ADDS r6, r6, lr ADCS r7, r7, r10 ADCS r8, r8, r11 ADC r9, r9, r12 - # Subtract top at 4 * 32 + /* Subtract top at 4 * 32 */ SUBS r6, r6, r2 SBCS r7, r7, r3 SBCS r8, r8, r4 @@ -5251,12 +5251,12 @@ sc_reduce: ADCS r8, r8, #0x0 ADC r9, r9, r1 BFC r9, #28, #4 - # Store result + /* Store result */ LDR r0, [sp, #52] STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} ADD sp, sp, #0x38 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 502 + /* Cycle Count = 502 */ .size sc_reduce,.-sc_reduce #endif /* WOLFSSL_SP_NO_UMAAL */ #ifdef HAVE_ED25519_SIGN @@ -5272,332 +5272,332 @@ sc_muladd: STM lr, {r0, r1, r3} MOV r0, #0x0 LDR r12, [r1] - # A[0] * B[0] + /* A[0] * B[0] */ LDR lr, [r2] UMULL r3, r4, r12, lr - # A[0] * B[2] + /* A[0] * B[2] */ LDR lr, [r2, #8] UMULL r5, r6, r12, lr - # A[0] * B[4] + /* A[0] * B[4] */ LDR lr, [r2, #16] UMULL r7, r8, r12, lr - # A[0] * B[6] + /* A[0] * B[6] */ LDR lr, [r2, #24] UMULL r9, r10, r12, lr STR r3, [sp] - # A[0] * B[1] + /* A[0] * B[1] */ LDR lr, [r2, #4] MOV r11, r0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[0] * B[3] + /* A[0] * B[3] */ LDR lr, [r2, #12] ADCS r6, r6, #0x0 ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[0] * B[5] + /* A[0] * B[5] */ LDR lr, [r2, #20] ADCS r8, r8, #0x0 ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[0] * B[7] + /* A[0] * B[7] */ LDR lr, [r2, #28] ADCS r10, r10, #0x0 ADC r3, r0, #0x0 UMLAL r10, r3, r12, lr - # A[1] * B[0] + /* A[1] * B[0] */ LDR r12, [r1, #4] LDR lr, [r2] MOV r11, #0x0 UMLAL r4, r11, r12, lr STR r4, [sp, #4] ADDS r5, r5, r11 - # A[1] * B[1] + /* A[1] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[1] * B[2] + /* A[1] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[1] * B[3] + /* A[1] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[1] * B[4] + /* A[1] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[1] * B[5] + /* A[1] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[1] * B[6] + /* A[1] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[1] * B[7] + /* A[1] * B[7] */ LDR lr, [r2, #28] ADC r4, r0, #0x0 UMLAL r3, r4, r12, lr - # A[2] * B[0] + /* A[2] * B[0] */ LDR r12, [r1, #8] LDR lr, [r2] MOV r11, #0x0 UMLAL r5, r11, r12, lr STR r5, [sp, #8] ADDS r6, r6, r11 - # A[2] * B[1] + /* A[2] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[2] * B[2] + /* A[2] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[2] * B[3] + /* A[2] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[2] * B[4] + /* A[2] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[2] * B[5] + /* A[2] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[2] * B[6] + /* A[2] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[2] * B[7] + /* A[2] * B[7] */ LDR lr, [r2, #28] ADC r5, r0, #0x0 UMLAL r4, r5, r12, lr - # A[3] * B[0] + /* A[3] * B[0] */ LDR r12, [r1, #12] LDR lr, [r2] MOV r11, #0x0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 - # A[3] * B[1] + /* A[3] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[3] * B[2] + /* A[3] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[3] * B[3] + /* A[3] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[3] * B[4] + /* A[3] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[3] * B[5] + /* A[3] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[3] * B[6] + /* A[3] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[3] * B[7] + /* A[3] * B[7] */ LDR lr, [r2, #28] ADC r6, r0, #0x0 UMLAL r5, r6, r12, lr - # A[4] * B[0] + /* A[4] * B[0] */ LDR r12, [r1, #16] LDR lr, [r2] MOV r11, #0x0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 - # A[4] * B[1] + /* A[4] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[4] * B[2] + /* A[4] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[4] * B[3] + /* A[4] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[4] * B[4] + /* A[4] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[4] * B[5] + /* A[4] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[4] * B[6] + /* A[4] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[4] * B[7] + /* A[4] * B[7] */ LDR lr, [r2, #28] ADC r7, r0, #0x0 UMLAL r6, r7, r12, lr - # A[5] * B[0] + /* A[5] * B[0] */ LDR r12, [r1, #20] LDR lr, [r2] MOV r11, #0x0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 - # A[5] * B[1] + /* A[5] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - # A[5] * B[2] + /* A[5] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[5] * B[3] + /* A[5] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[5] * B[4] + /* A[5] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[5] * B[5] + /* A[5] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[5] * B[6] + /* A[5] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[5] * B[7] + /* A[5] * B[7] */ LDR lr, [r2, #28] ADC r8, r0, #0x0 UMLAL r7, r8, r12, lr - # A[6] * B[0] + /* A[6] * B[0] */ LDR r12, [r1, #24] LDR lr, [r2] MOV r11, #0x0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 - # A[6] * B[1] + /* A[6] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 - # A[6] * B[2] + /* A[6] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[6] * B[3] + /* A[6] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[6] * B[4] + /* A[6] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[6] * B[5] + /* A[6] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[6] * B[6] + /* A[6] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[6] * B[7] + /* A[6] * B[7] */ LDR lr, [r2, #28] ADC r9, r0, #0x0 UMLAL r8, r9, r12, lr - # A[7] * B[0] + /* A[7] * B[0] */ LDR r12, [r1, #28] LDR lr, [r2] MOV r11, #0x0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 - # A[7] * B[1] + /* A[7] * B[1] */ LDR lr, [r2, #4] ADC r11, r0, #0x0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 - # A[7] * B[2] + /* A[7] * B[2] */ LDR lr, [r2, #8] ADC r11, r0, #0x0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 - # A[7] * B[3] + /* A[7] * B[3] */ LDR lr, [r2, #12] ADC r11, r0, #0x0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 - # A[7] * B[4] + /* A[7] * B[4] */ LDR lr, [r2, #16] ADC r11, r0, #0x0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 - # A[7] * B[5] + /* A[7] * B[5] */ LDR lr, [r2, #20] ADC r11, r0, #0x0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 - # A[7] * B[6] + /* A[7] * B[6] */ LDR lr, [r2, #24] ADC r11, r0, #0x0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 - # A[7] * B[7] + /* A[7] * B[7] */ LDR lr, [r2, #28] ADC r10, r0, #0x0 UMLAL r9, r10, r12, lr ADD lr, sp, #0x20 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} MOV r0, sp - # Add c to a * b + /* Add c to a * b */ LDR lr, [sp, #76] LDM r0, {r2, r3, r4, r5, r6, r7, r8, r9} LDM lr!, {r1, r10, r11, r12} @@ -5622,7 +5622,7 @@ sc_muladd: ADCS r8, r8, #0x0 ADC r9, r9, #0x0 SUB r0, r0, #0x20 - # Get 252..503 and 504..507 + /* Get 252..503 and 504..507 */ LSR lr, r9, #24 LSL r9, r9, #4 ORR r9, r9, r8, LSR #28 @@ -5641,7 +5641,7 @@ sc_muladd: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 - # Add order times bits 504..507 + /* Add order times bits 504..507 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 @@ -5672,7 +5672,7 @@ sc_muladd: SBCS r7, r7, #0x0 SBCS r8, r8, #0x0 SBC r9, r9, #0x0 - # Sub product of top 8 words and order + /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a @@ -5840,7 +5840,7 @@ sc_muladd: UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} SUB r12, r12, #0x20 - # Subtract at 4 * 32 + /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 SBCS r11, r11, r3 @@ -5859,7 +5859,7 @@ sc_muladd: STM r12!, {r10, r11} SUB r12, r12, #0x24 ASR lr, r11, #25 - # Conditionally subtract order starting at bit 125 + /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 MOV r2, #0xba7d MOVT r2, #0x4b9e @@ -5897,7 +5897,7 @@ sc_muladd: STM r12!, {r10} SUB r0, r0, #0x10 MOV r12, sp - # Load bits 252-376 + /* Load bits 252-376 */ ADD r12, r12, #0x1c LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 @@ -5910,9 +5910,9 @@ sc_muladd: ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 SUB r12, r12, #0x1c - # Sub product of top 4 words and order + /* Sub product of top 4 words and order */ MOV r0, sp - # * -5cf5d3ed + /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a MOV lr, #0x0 @@ -5932,7 +5932,7 @@ sc_muladd: UMLAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -5812631b + /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed MOV r10, #0x0 @@ -5952,7 +5952,7 @@ sc_muladd: UMLAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -a2f79cd7 + /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 MOV r11, #0x0 @@ -5972,7 +5972,7 @@ sc_muladd: UMLAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -14def9df + /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 MOV r12, #0x0 @@ -5992,14 +5992,14 @@ sc_muladd: UMLAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # Add overflows at 4 * 32 + /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 ADDS r6, r6, lr ADCS r7, r7, r10 ADCS r8, r8, r11 ADC r9, r9, r12 - # Subtract top at 4 * 32 + /* Subtract top at 4 * 32 */ SUBS r6, r6, r2 SBCS r7, r7, r3 SBCS r8, r8, r4 @@ -6030,7 +6030,7 @@ sc_muladd: ADC r9, r9, r1 BFC r9, #28, #4 LDR r0, [sp, #68] - # Store result + /* Store result */ STR r2, [r0] STR r3, [r0, #4] STR r4, [r0, #8] @@ -6041,7 +6041,7 @@ sc_muladd: STR r9, [r0, #28] ADD sp, sp, #0x50 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 994 + /* Cycle Count = 994 */ .size sc_muladd,.-sc_muladd #else .text @@ -6153,7 +6153,7 @@ sc_muladd: ADD lr, sp, #0x20 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} MOV r0, sp - # Add c to a * b + /* Add c to a * b */ LDR lr, [sp, #76] LDM r0, {r2, r3, r4, r5, r6, r7, r8, r9} LDM lr!, {r1, r10, r11, r12} @@ -6178,7 +6178,7 @@ sc_muladd: ADCS r8, r8, #0x0 ADC r9, r9, #0x0 SUB r0, r0, #0x20 - # Get 252..503 and 504..507 + /* Get 252..503 and 504..507 */ LSR lr, r9, #24 LSL r9, r9, #4 ORR r9, r9, r8, LSR #28 @@ -6197,7 +6197,7 @@ sc_muladd: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 - # Add order times bits 504..507 + /* Add order times bits 504..507 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 @@ -6219,7 +6219,7 @@ sc_muladd: SBCS r7, r7, #0x0 SBCS r8, r8, #0x0 SBC r9, r9, #0x0 - # Sub product of top 8 words and order + /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a @@ -6303,7 +6303,7 @@ sc_muladd: UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} SUB r12, r12, #0x20 - # Subtract at 4 * 32 + /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 SBCS r11, r11, r3 @@ -6322,7 +6322,7 @@ sc_muladd: STM r12!, {r10, r11} SUB r12, r12, #0x24 ASR lr, r11, #25 - # Conditionally subtract order starting at bit 125 + /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 MOV r2, #0xba7d MOVT r2, #0x4b9e @@ -6360,7 +6360,7 @@ sc_muladd: STM r12!, {r10} SUB r0, r0, #0x10 MOV r12, sp - # Load bits 252-376 + /* Load bits 252-376 */ ADD r12, r12, #0x1c LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 @@ -6373,9 +6373,9 @@ sc_muladd: ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 SUB r12, r12, #0x1c - # Sub product of top 4 words and order + /* Sub product of top 4 words and order */ MOV r0, sp - # * -5cf5d3ed + /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a MOV lr, #0x0 @@ -6386,7 +6386,7 @@ sc_muladd: UMAAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -5812631b + /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed MOV r10, #0x0 @@ -6397,7 +6397,7 @@ sc_muladd: UMAAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -a2f79cd7 + /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 MOV r11, #0x0 @@ -6408,7 +6408,7 @@ sc_muladd: UMAAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # * -14def9df + /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 MOV r12, #0x0 @@ -6419,14 +6419,14 @@ sc_muladd: UMAAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} ADD r0, r0, #0x4 - # Add overflows at 4 * 32 + /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 ADDS r6, r6, lr ADCS r7, r7, r10 ADCS r8, r8, r11 ADC r9, r9, r12 - # Subtract top at 4 * 32 + /* Subtract top at 4 * 32 */ SUBS r6, r6, r2 SBCS r7, r7, r3 SBCS r8, r8, r4 @@ -6457,7 +6457,7 @@ sc_muladd: ADC r9, r9, r1 BFC r9, #28, #4 LDR r0, [sp, #68] - # Store result + /* Store result */ STR r2, [r0] STR r3, [r0, #4] STR r4, [r0, #8] @@ -6468,7 +6468,7 @@ sc_muladd: STR r9, [r0, #28] ADD sp, sp, #0x50 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 752 + /* Cycle Count = 752 */ .size sc_muladd,.-sc_muladd #endif /* WOLFSSL_SP_NO_UMAAL */ #endif /* HAVE_ED25519_SIGN */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index edb2af0683..a5403e99ed 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -39,7 +39,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -2796,9 +2796,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "MOV %[a], #0x1c\n\t" "STR %[a], [sp, #176]\n\t" "\n" - "L_curve25519_words_%=:\n\t" + "L_curve25519_words:\n\t" "\n" - "L_curve25519_bits_%=:\n\t" + "L_curve25519_bits:\n\t" "LDR %[n], [sp, #164]\n\t" "LDR %[a], [%[n], r2]\n\t" "LDR %[n], [sp, #180]\n\t" @@ -2978,19 +2978,19 @@ int curve25519(byte* r, const byte* n, const byte* a) "LDR %[n], [sp, #180]\n\t" "SUBS %[n], %[n], #0x1\n\t" "STR %[n], [sp, #180]\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGE L_curve25519_bits_%=\n\t" +#ifdef __GNUC__ + "BGE L_curve25519_bits\n\t" #else - "BGE.N L_curve25519_bits_%=\n\t" + "BGE.W L_curve25519_bits\n\t" #endif "MOV %[n], #0x1f\n\t" "STR %[n], [sp, #180]\n\t" "SUBS %[a], %[a], #0x4\n\t" "STR %[a], [sp, #176]\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGE L_curve25519_words_%=\n\t" +#ifdef __GNUC__ + "BGE L_curve25519_words\n\t" #else - "BGE.N L_curve25519_words_%=\n\t" + "BGE.W L_curve25519_words\n\t" #endif /* Invert */ "ADD r1, sp, #0x0\n\t" @@ -3022,7 +3022,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x4\n\t" "\n" - "L_curve25519_inv_1_%=:\n\t" + "L_curve25519_inv_1:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3030,9 +3030,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_1_%=\n\t" + "BNE L_curve25519_inv_1\n\t" #else - "BNE.N L_curve25519_inv_1_%=\n\t" + "BNE.N L_curve25519_inv_1\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3043,7 +3043,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x9\n\t" "\n" - "L_curve25519_inv_2_%=:\n\t" + "L_curve25519_inv_2:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3051,9 +3051,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_2_%=\n\t" + "BNE L_curve25519_inv_2\n\t" #else - "BNE.N L_curve25519_inv_2_%=\n\t" + "BNE.N L_curve25519_inv_2\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3064,7 +3064,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x13\n\t" "\n" - "L_curve25519_inv_3_%=:\n\t" + "L_curve25519_inv_3:\n\t" "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x80\n\t" "PUSH {r12}\n\t" @@ -3072,9 +3072,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_3_%=\n\t" + "BNE L_curve25519_inv_3\n\t" #else - "BNE.N L_curve25519_inv_3_%=\n\t" + "BNE.N L_curve25519_inv_3\n\t" #endif "ADD r2, sp, #0x60\n\t" "ADD r1, sp, #0x80\n\t" @@ -3082,7 +3082,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0xa\n\t" "\n" - "L_curve25519_inv_4_%=:\n\t" + "L_curve25519_inv_4:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3090,9 +3090,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_4_%=\n\t" + "BNE L_curve25519_inv_4\n\t" #else - "BNE.N L_curve25519_inv_4_%=\n\t" + "BNE.N L_curve25519_inv_4\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3103,7 +3103,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x31\n\t" "\n" - "L_curve25519_inv_5_%=:\n\t" + "L_curve25519_inv_5:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3111,9 +3111,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_5_%=\n\t" + "BNE L_curve25519_inv_5\n\t" #else - "BNE.N L_curve25519_inv_5_%=\n\t" + "BNE.N L_curve25519_inv_5\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3124,7 +3124,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x63\n\t" "\n" - "L_curve25519_inv_6_%=:\n\t" + "L_curve25519_inv_6:\n\t" "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x80\n\t" "PUSH {r12}\n\t" @@ -3132,9 +3132,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_6_%=\n\t" + "BNE L_curve25519_inv_6\n\t" #else - "BNE.N L_curve25519_inv_6_%=\n\t" + "BNE.N L_curve25519_inv_6\n\t" #endif "ADD r2, sp, #0x60\n\t" "ADD r1, sp, #0x80\n\t" @@ -3142,7 +3142,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0x32\n\t" "\n" - "L_curve25519_inv_7_%=:\n\t" + "L_curve25519_inv_7:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3150,9 +3150,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_7_%=\n\t" + "BNE L_curve25519_inv_7\n\t" #else - "BNE.N L_curve25519_inv_7_%=\n\t" + "BNE.N L_curve25519_inv_7\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3160,7 +3160,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0x5\n\t" "\n" - "L_curve25519_inv_8_%=:\n\t" + "L_curve25519_inv_8:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3168,9 +3168,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_8_%=\n\t" + "BNE L_curve25519_inv_8\n\t" #else - "BNE.N L_curve25519_inv_8_%=\n\t" + "BNE.N L_curve25519_inv_8\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3234,7 +3234,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV %[a], #0xfe\n\t" "\n" - "L_curve25519_bits_%=:\n\t" + "L_curve25519_bits:\n\t" "STR %[a], [sp, #168]\n\t" "LDR %[n], [sp, #160]\n\t" "AND r4, %[a], #0x1f\n\t" @@ -3320,9 +3320,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "LDR %[a], [sp, #168]\n\t" "SUBS %[a], %[a], #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGE L_curve25519_bits_%=\n\t" + "BGE L_curve25519_bits\n\t" #else - "BGE.N L_curve25519_bits_%=\n\t" + "BGE.N L_curve25519_bits\n\t" #endif /* Cycle Count: 171 */ "LDR %[n], [sp, #184]\n\t" @@ -3359,7 +3359,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x4\n\t" "\n" - "L_curve25519_inv_1_%=:\n\t" + "L_curve25519_inv_1:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3367,9 +3367,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_1_%=\n\t" + "BNE L_curve25519_inv_1\n\t" #else - "BNE.N L_curve25519_inv_1_%=\n\t" + "BNE.N L_curve25519_inv_1\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3380,7 +3380,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x9\n\t" "\n" - "L_curve25519_inv_2_%=:\n\t" + "L_curve25519_inv_2:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3388,9 +3388,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_2_%=\n\t" + "BNE L_curve25519_inv_2\n\t" #else - "BNE.N L_curve25519_inv_2_%=\n\t" + "BNE.N L_curve25519_inv_2\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3401,7 +3401,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x13\n\t" "\n" - "L_curve25519_inv_3_%=:\n\t" + "L_curve25519_inv_3:\n\t" "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x80\n\t" "PUSH {r12}\n\t" @@ -3409,9 +3409,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_3_%=\n\t" + "BNE L_curve25519_inv_3\n\t" #else - "BNE.N L_curve25519_inv_3_%=\n\t" + "BNE.N L_curve25519_inv_3\n\t" #endif "ADD r2, sp, #0x60\n\t" "ADD r1, sp, #0x80\n\t" @@ -3419,7 +3419,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0xa\n\t" "\n" - "L_curve25519_inv_4_%=:\n\t" + "L_curve25519_inv_4:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3427,9 +3427,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_4_%=\n\t" + "BNE L_curve25519_inv_4\n\t" #else - "BNE.N L_curve25519_inv_4_%=\n\t" + "BNE.N L_curve25519_inv_4\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3440,7 +3440,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x31\n\t" "\n" - "L_curve25519_inv_5_%=:\n\t" + "L_curve25519_inv_5:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3448,9 +3448,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_5_%=\n\t" + "BNE L_curve25519_inv_5\n\t" #else - "BNE.N L_curve25519_inv_5_%=\n\t" + "BNE.N L_curve25519_inv_5\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3461,7 +3461,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_sq_op\n\t" "MOV r12, #0x63\n\t" "\n" - "L_curve25519_inv_6_%=:\n\t" + "L_curve25519_inv_6:\n\t" "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x80\n\t" "PUSH {r12}\n\t" @@ -3469,9 +3469,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_6_%=\n\t" + "BNE L_curve25519_inv_6\n\t" #else - "BNE.N L_curve25519_inv_6_%=\n\t" + "BNE.N L_curve25519_inv_6\n\t" #endif "ADD r2, sp, #0x60\n\t" "ADD r1, sp, #0x80\n\t" @@ -3479,7 +3479,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0x32\n\t" "\n" - "L_curve25519_inv_7_%=:\n\t" + "L_curve25519_inv_7:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3487,9 +3487,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_7_%=\n\t" + "BNE L_curve25519_inv_7\n\t" #else - "BNE.N L_curve25519_inv_7_%=\n\t" + "BNE.N L_curve25519_inv_7\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3497,7 +3497,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r12, #0x5\n\t" "\n" - "L_curve25519_inv_8_%=:\n\t" + "L_curve25519_inv_8:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3505,9 +3505,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_curve25519_inv_8_%=\n\t" + "BNE L_curve25519_inv_8\n\t" #else - "BNE.N L_curve25519_inv_8_%=\n\t" + "BNE.N L_curve25519_inv_8\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3589,7 +3589,7 @@ void fe_invert(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x4\n\t" "\n" - "L_fe_invert1_%=:\n\t" + "L_fe_invert1:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3597,9 +3597,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert1_%=\n\t" + "BNE L_fe_invert1\n\t" #else - "BNE.N L_fe_invert1_%=\n\t" + "BNE.N L_fe_invert1\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3610,7 +3610,7 @@ void fe_invert(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x9\n\t" "\n" - "L_fe_invert2_%=:\n\t" + "L_fe_invert2:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3618,9 +3618,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert2_%=\n\t" + "BNE L_fe_invert2\n\t" #else - "BNE.N L_fe_invert2_%=\n\t" + "BNE.N L_fe_invert2\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3631,7 +3631,7 @@ void fe_invert(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x13\n\t" "\n" - "L_fe_invert3_%=:\n\t" + "L_fe_invert3:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3639,9 +3639,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert3_%=\n\t" + "BNE L_fe_invert3\n\t" #else - "BNE.N L_fe_invert3_%=\n\t" + "BNE.N L_fe_invert3\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3649,7 +3649,7 @@ void fe_invert(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0xa\n\t" "\n" - "L_fe_invert4_%=:\n\t" + "L_fe_invert4:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3657,9 +3657,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert4_%=\n\t" + "BNE L_fe_invert4\n\t" #else - "BNE.N L_fe_invert4_%=\n\t" + "BNE.N L_fe_invert4\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3670,7 +3670,7 @@ void fe_invert(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x31\n\t" "\n" - "L_fe_invert5_%=:\n\t" + "L_fe_invert5:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3678,9 +3678,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert5_%=\n\t" + "BNE L_fe_invert5\n\t" #else - "BNE.N L_fe_invert5_%=\n\t" + "BNE.N L_fe_invert5\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3691,7 +3691,7 @@ void fe_invert(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x63\n\t" "\n" - "L_fe_invert6_%=:\n\t" + "L_fe_invert6:\n\t" "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t" "PUSH {r12}\n\t" @@ -3699,9 +3699,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert6_%=\n\t" + "BNE L_fe_invert6\n\t" #else - "BNE.N L_fe_invert6_%=\n\t" + "BNE.N L_fe_invert6\n\t" #endif "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" @@ -3709,7 +3709,7 @@ void fe_invert(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0x32\n\t" "\n" - "L_fe_invert7_%=:\n\t" + "L_fe_invert7:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -3717,9 +3717,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert7_%=\n\t" + "BNE L_fe_invert7\n\t" #else - "BNE.N L_fe_invert7_%=\n\t" + "BNE.N L_fe_invert7\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -3727,7 +3727,7 @@ void fe_invert(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0x5\n\t" "\n" - "L_fe_invert8_%=:\n\t" + "L_fe_invert8:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -3735,9 +3735,9 @@ void fe_invert(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_invert8_%=\n\t" + "BNE L_fe_invert8\n\t" #else - "BNE.N L_fe_invert8_%=\n\t" + "BNE.N L_fe_invert8\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4275,7 +4275,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x4\n\t" "\n" - "L_fe_pow22523_1_%=:\n\t" + "L_fe_pow22523_1:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -4283,9 +4283,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_1_%=\n\t" + "BNE L_fe_pow22523_1\n\t" #else - "BNE.N L_fe_pow22523_1_%=\n\t" + "BNE.N L_fe_pow22523_1\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4296,7 +4296,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x9\n\t" "\n" - "L_fe_pow22523_2_%=:\n\t" + "L_fe_pow22523_2:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -4304,9 +4304,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_2_%=\n\t" + "BNE L_fe_pow22523_2\n\t" #else - "BNE.N L_fe_pow22523_2_%=\n\t" + "BNE.N L_fe_pow22523_2\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4317,7 +4317,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x13\n\t" "\n" - "L_fe_pow22523_3_%=:\n\t" + "L_fe_pow22523_3:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -4325,9 +4325,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_3_%=\n\t" + "BNE L_fe_pow22523_3\n\t" #else - "BNE.N L_fe_pow22523_3_%=\n\t" + "BNE.N L_fe_pow22523_3\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -4335,7 +4335,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0xa\n\t" "\n" - "L_fe_pow22523_4_%=:\n\t" + "L_fe_pow22523_4:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -4343,9 +4343,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_4_%=\n\t" + "BNE L_fe_pow22523_4\n\t" #else - "BNE.N L_fe_pow22523_4_%=\n\t" + "BNE.N L_fe_pow22523_4\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4356,7 +4356,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x31\n\t" "\n" - "L_fe_pow22523_5_%=:\n\t" + "L_fe_pow22523_5:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -4364,9 +4364,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_5_%=\n\t" + "BNE L_fe_pow22523_5\n\t" #else - "BNE.N L_fe_pow22523_5_%=\n\t" + "BNE.N L_fe_pow22523_5\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4377,7 +4377,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_sq_op\n\t" "MOV r12, #0x63\n\t" "\n" - "L_fe_pow22523_6_%=:\n\t" + "L_fe_pow22523_6:\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "PUSH {r12}\n\t" @@ -4385,9 +4385,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_6_%=\n\t" + "BNE L_fe_pow22523_6\n\t" #else - "BNE.N L_fe_pow22523_6_%=\n\t" + "BNE.N L_fe_pow22523_6\n\t" #endif "ADD r2, sp, #0x20\n\t" "ADD r1, sp, #0x40\n\t" @@ -4395,7 +4395,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0x32\n\t" "\n" - "L_fe_pow22523_7_%=:\n\t" + "L_fe_pow22523_7:\n\t" "ADD r1, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t" "PUSH {r12}\n\t" @@ -4403,9 +4403,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_7_%=\n\t" + "BNE L_fe_pow22523_7\n\t" #else - "BNE.N L_fe_pow22523_7_%=\n\t" + "BNE.N L_fe_pow22523_7\n\t" #endif "MOV r2, sp\n\t" "ADD r1, sp, #0x20\n\t" @@ -4413,7 +4413,7 @@ void fe_pow22523(fe r, const fe a) "BL fe_mul_op\n\t" "MOV r12, #0x2\n\t" "\n" - "L_fe_pow22523_8_%=:\n\t" + "L_fe_pow22523_8:\n\t" "MOV r1, sp\n\t" "MOV r0, sp\n\t" "PUSH {r12}\n\t" @@ -4421,9 +4421,9 @@ void fe_pow22523(fe r, const fe a) "POP {r12}\n\t" "SUBS r12, r12, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_fe_pow22523_8_%=\n\t" + "BNE L_fe_pow22523_8\n\t" #else - "BNE.N L_fe_pow22523_8_%=\n\t" + "BNE.N L_fe_pow22523_8\n\t" #endif "LDR r2, [sp, #100]\n\t" "MOV r1, sp\n\t" @@ -6904,7 +6904,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) #endif /* HAVE_CURVE25519 || HAVE_ED25519 */ #endif /* !__aarch64__ && __thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S index 7c59e25482..30d8dc76b5 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S @@ -113,7 +113,7 @@ Transform_Sha256_Len: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0xc0 ADR r3, L_SHA256_transform_len_k - # Copy digest to add in at end + /* Copy digest to add in at end */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] LDRD r8, r9, [r0, #16] @@ -122,9 +122,9 @@ Transform_Sha256_Len: STRD r6, r7, [sp, #72] STRD r8, r9, [sp, #80] STRD r10, r11, [sp, #88] - # Start of loop processing a block + /* Start of loop processing a block */ L_SHA256_transform_len_begin: - # Load, Reverse and Store W - 64 bytes + /* Load, Reverse and Store W - 64 bytes */ LDR r4, [r1] LDR r5, [r1, #4] LDR r6, [r1, #8] @@ -169,9 +169,9 @@ L_SHA256_transform_len_begin: LDR r4, [r0, #8] EOR r11, r11, r4 MOV r12, #0x3 - # Start of 16 rounds + /* Start of 16 rounds */ L_SHA256_transform_len_start: - # Round 0 + /* Round 0 */ LDR r5, [r0, #16] LDR r6, [r0, #20] LDR r7, [r0, #24] @@ -203,7 +203,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - # Calc new W[0] + /* Calc new W[0] */ LDR r6, [sp, #56] LDR r7, [sp, #36] LDR r8, [sp, #4] @@ -218,7 +218,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp] - # Round 1 + /* Round 1 */ LDR r5, [r0, #12] LDR r6, [r0, #16] LDR r7, [r0, #20] @@ -250,7 +250,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - # Calc new W[1] + /* Calc new W[1] */ LDR r6, [sp, #60] LDR r7, [sp, #40] LDR r8, [sp, #8] @@ -265,7 +265,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #4] - # Round 2 + /* Round 2 */ LDR r5, [r0, #8] LDR r6, [r0, #12] LDR r7, [r0, #16] @@ -297,7 +297,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - # Calc new W[2] + /* Calc new W[2] */ LDR r6, [sp] LDR r7, [sp, #44] LDR r8, [sp, #12] @@ -312,7 +312,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #8] - # Round 3 + /* Round 3 */ LDR r5, [r0, #4] LDR r6, [r0, #8] LDR r7, [r0, #12] @@ -344,7 +344,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - # Calc new W[3] + /* Calc new W[3] */ LDR r6, [sp, #4] LDR r7, [sp, #48] LDR r8, [sp, #16] @@ -359,7 +359,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #12] - # Round 4 + /* Round 4 */ LDR r5, [r0] LDR r6, [r0, #4] LDR r7, [r0, #8] @@ -391,7 +391,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - # Calc new W[4] + /* Calc new W[4] */ LDR r6, [sp, #8] LDR r7, [sp, #52] LDR r8, [sp, #20] @@ -406,7 +406,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #16] - # Round 5 + /* Round 5 */ LDR r5, [r0, #28] LDR r6, [r0] LDR r7, [r0, #4] @@ -438,7 +438,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - # Calc new W[5] + /* Calc new W[5] */ LDR r6, [sp, #12] LDR r7, [sp, #56] LDR r8, [sp, #24] @@ -453,7 +453,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #20] - # Round 6 + /* Round 6 */ LDR r5, [r0, #24] LDR r6, [r0, #28] LDR r7, [r0] @@ -485,7 +485,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - # Calc new W[6] + /* Calc new W[6] */ LDR r6, [sp, #16] LDR r7, [sp, #60] LDR r8, [sp, #28] @@ -500,7 +500,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #24] - # Round 7 + /* Round 7 */ LDR r5, [r0, #20] LDR r6, [r0, #24] LDR r7, [r0, #28] @@ -532,7 +532,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - # Calc new W[7] + /* Calc new W[7] */ LDR r6, [sp, #20] LDR r7, [sp] LDR r8, [sp, #32] @@ -547,7 +547,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #28] - # Round 8 + /* Round 8 */ LDR r5, [r0, #16] LDR r6, [r0, #20] LDR r7, [r0, #24] @@ -579,7 +579,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - # Calc new W[8] + /* Calc new W[8] */ LDR r6, [sp, #24] LDR r7, [sp, #4] LDR r8, [sp, #36] @@ -594,7 +594,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #32] - # Round 9 + /* Round 9 */ LDR r5, [r0, #12] LDR r6, [r0, #16] LDR r7, [r0, #20] @@ -626,7 +626,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - # Calc new W[9] + /* Calc new W[9] */ LDR r6, [sp, #28] LDR r7, [sp, #8] LDR r8, [sp, #40] @@ -641,7 +641,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #36] - # Round 10 + /* Round 10 */ LDR r5, [r0, #8] LDR r6, [r0, #12] LDR r7, [r0, #16] @@ -673,7 +673,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - # Calc new W[10] + /* Calc new W[10] */ LDR r6, [sp, #32] LDR r7, [sp, #12] LDR r8, [sp, #44] @@ -688,7 +688,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #40] - # Round 11 + /* Round 11 */ LDR r5, [r0, #4] LDR r6, [r0, #8] LDR r7, [r0, #12] @@ -720,7 +720,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - # Calc new W[11] + /* Calc new W[11] */ LDR r6, [sp, #36] LDR r7, [sp, #16] LDR r8, [sp, #48] @@ -735,7 +735,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #44] - # Round 12 + /* Round 12 */ LDR r5, [r0] LDR r6, [r0, #4] LDR r7, [r0, #8] @@ -767,7 +767,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - # Calc new W[12] + /* Calc new W[12] */ LDR r6, [sp, #40] LDR r7, [sp, #20] LDR r8, [sp, #52] @@ -782,7 +782,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #48] - # Round 13 + /* Round 13 */ LDR r5, [r0, #28] LDR r6, [r0] LDR r7, [r0, #4] @@ -814,7 +814,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - # Calc new W[13] + /* Calc new W[13] */ LDR r6, [sp, #44] LDR r7, [sp, #24] LDR r8, [sp, #56] @@ -829,7 +829,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #52] - # Round 14 + /* Round 14 */ LDR r5, [r0, #24] LDR r6, [r0, #28] LDR r7, [r0] @@ -861,7 +861,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - # Calc new W[14] + /* Calc new W[14] */ LDR r6, [sp, #48] LDR r7, [sp, #28] LDR r8, [sp, #60] @@ -876,7 +876,7 @@ L_SHA256_transform_len_start: ADD r4, r4, r5 ADD r9, r9, r4 STR r9, [sp, #56] - # Round 15 + /* Round 15 */ LDR r5, [r0, #20] LDR r6, [r0, #24] LDR r7, [r0, #28] @@ -908,7 +908,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - # Calc new W[15] + /* Calc new W[15] */ LDR r6, [sp, #52] LDR r7, [sp, #32] LDR r8, [sp] @@ -925,12 +925,12 @@ L_SHA256_transform_len_start: STR r9, [sp, #60] ADD r3, r3, #0x40 SUBS r12, r12, #0x1 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_SHA256_transform_len_start #else - BNE.N L_SHA256_transform_len_start + BNE.W L_SHA256_transform_len_start #endif - # Round 0 + /* Round 0 */ LDR r5, [r0, #16] LDR r6, [r0, #20] LDR r7, [r0, #24] @@ -962,7 +962,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - # Round 1 + /* Round 1 */ LDR r5, [r0, #12] LDR r6, [r0, #16] LDR r7, [r0, #20] @@ -994,7 +994,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - # Round 2 + /* Round 2 */ LDR r5, [r0, #8] LDR r6, [r0, #12] LDR r7, [r0, #16] @@ -1026,7 +1026,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - # Round 3 + /* Round 3 */ LDR r5, [r0, #4] LDR r6, [r0, #8] LDR r7, [r0, #12] @@ -1058,7 +1058,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - # Round 4 + /* Round 4 */ LDR r5, [r0] LDR r6, [r0, #4] LDR r7, [r0, #8] @@ -1090,7 +1090,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - # Round 5 + /* Round 5 */ LDR r5, [r0, #28] LDR r6, [r0] LDR r7, [r0, #4] @@ -1122,7 +1122,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - # Round 6 + /* Round 6 */ LDR r5, [r0, #24] LDR r6, [r0, #28] LDR r7, [r0] @@ -1154,7 +1154,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - # Round 7 + /* Round 7 */ LDR r5, [r0, #20] LDR r6, [r0, #24] LDR r7, [r0, #28] @@ -1186,7 +1186,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - # Round 8 + /* Round 8 */ LDR r5, [r0, #16] LDR r6, [r0, #20] LDR r7, [r0, #24] @@ -1218,7 +1218,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - # Round 9 + /* Round 9 */ LDR r5, [r0, #12] LDR r6, [r0, #16] LDR r7, [r0, #20] @@ -1250,7 +1250,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - # Round 10 + /* Round 10 */ LDR r5, [r0, #8] LDR r6, [r0, #12] LDR r7, [r0, #16] @@ -1282,7 +1282,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - # Round 11 + /* Round 11 */ LDR r5, [r0, #4] LDR r6, [r0, #8] LDR r7, [r0, #12] @@ -1314,7 +1314,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - # Round 12 + /* Round 12 */ LDR r5, [r0] LDR r6, [r0, #4] LDR r7, [r0, #8] @@ -1346,7 +1346,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - # Round 13 + /* Round 13 */ LDR r5, [r0, #28] LDR r6, [r0] LDR r7, [r0, #4] @@ -1378,7 +1378,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - # Round 14 + /* Round 14 */ LDR r5, [r0, #24] LDR r6, [r0, #28] LDR r7, [r0] @@ -1410,7 +1410,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - # Round 15 + /* Round 15 */ LDR r5, [r0, #20] LDR r6, [r0, #24] LDR r7, [r0, #28] @@ -1442,7 +1442,7 @@ L_SHA256_transform_len_start: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - # Add in digest from start + /* Add in digest from start */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] LDRD r8, r9, [sp, #64] @@ -1470,14 +1470,14 @@ L_SHA256_transform_len_start: SUBS r2, r2, #0x40 SUB r3, r3, #0xc0 ADD r1, r1, #0x40 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_SHA256_transform_len_begin #else - BNE.N L_SHA256_transform_len_begin + BNE.W L_SHA256_transform_len_begin #endif ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 1874 + /* Cycle Count = 1874 */ .size Transform_Sha256_Len,.-Transform_Sha256_Len #endif /* WOLFSSL_ARMASM_NO_NEON */ #endif /* !NO_SHA256 */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index 2483f036d5..a2367c2a2c 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -39,7 +39,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -84,8 +84,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; register const byte* data __asm__ ("r1") = (const byte*)data_p; register word32 len __asm__ ("r2") = (word32)len_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint32_t* L_SHA256_transform_len_k_c __asm__ ("r3") = (uint32_t*)&L_SHA256_transform_len_k; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" @@ -101,7 +101,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "STRD r10, r11, [sp, #88]\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=:\n\t" + "L_SHA256_transform_len_begin:\n\t" /* Load, Reverse and Store W - 64 bytes */ "LDR r4, [%[data]]\n\t" "LDR r5, [%[data], #4]\n\t" @@ -149,7 +149,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "MOV r12, #0x3\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_%=:\n\t" + "L_SHA256_transform_len_start:\n\t" /* Round 0 */ "LDR r5, [%[sha256], #16]\n\t" "LDR r6, [%[sha256], #20]\n\t" @@ -904,10 +904,10 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "STR r9, [sp, #60]\n\t" "ADD r3, r3, #0x40\n\t" "SUBS r12, r12, #0x1\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_SHA256_transform_len_start_%=\n\t" +#ifdef __GNUC__ + "BNE L_SHA256_transform_len_start\n\t" #else - "BNE.N L_SHA256_transform_len_start_%=\n\t" + "BNE.W L_SHA256_transform_len_start\n\t" #endif /* Round 0 */ "LDR r5, [%[sha256], #16]\n\t" @@ -1449,14 +1449,20 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "SUBS %[len], %[len], #0x40\n\t" "SUB r3, r3, #0xc0\n\t" "ADD %[data], %[data], #0x40\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_SHA256_transform_len_begin_%=\n\t" +#ifdef __GNUC__ + "BNE L_SHA256_transform_len_begin\n\t" #else - "BNE.N L_SHA256_transform_len_begin_%=\n\t" + "BNE.W L_SHA256_transform_len_begin\n\t" #endif "ADD sp, sp, #0xc0\n\t" - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), + [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : +#else + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len) + : [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -1465,7 +1471,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) #endif /* !NO_SHA256 */ #endif /* !__aarch64__ && __thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S index b420e78634..6031b92404 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S @@ -209,7 +209,7 @@ Transform_Sha512_Len: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0xc0 ADR r3, L_SHA512_transform_len_k - # Copy digest to add in at end + /* Copy digest to add in at end */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] LDRD r8, r9, [r0, #16] @@ -226,9 +226,9 @@ Transform_Sha512_Len: STRD r6, r7, [sp, #168] STRD r8, r9, [sp, #176] STRD r10, r11, [sp, #184] - # Start of loop processing a block + /* Start of loop processing a block */ L_SHA512_transform_len_begin: - # Load, Reverse and Store W + /* Load, Reverse and Store W */ LDR r4, [r1] LDR r5, [r1, #4] LDR r6, [r1, #8] @@ -325,15 +325,15 @@ L_SHA512_transform_len_begin: STR r8, [sp, #116] STR r11, [sp, #120] STR r10, [sp, #124] - # Pre-calc: b ^ c + /* Pre-calc: b ^ c */ LDRD r10, r11, [r0, #8] LDRD r4, r5, [r0, #16] EOR r10, r10, r4 EOR r11, r11, r5 MOV r12, #0x4 - # Start of 16 rounds + /* Start of 16 rounds */ L_SHA512_transform_len_start: - # Round 0 + /* Round 0 */ LDRD r4, r5, [r0, #32] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -413,7 +413,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #56] MOV r10, r8 MOV r11, r9 - # Calc new W[0] + /* Calc new W[0] */ LDRD r4, r5, [sp, #112] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -457,7 +457,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp] - # Round 1 + /* Round 1 */ LDRD r4, r5, [r0, #24] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -537,7 +537,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #48] MOV r10, r8 MOV r11, r9 - # Calc new W[1] + /* Calc new W[1] */ LDRD r4, r5, [sp, #120] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -581,7 +581,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #8] - # Round 2 + /* Round 2 */ LDRD r4, r5, [r0, #16] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -661,7 +661,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #40] MOV r10, r8 MOV r11, r9 - # Calc new W[2] + /* Calc new W[2] */ LDRD r4, r5, [sp] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -705,7 +705,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #16] - # Round 3 + /* Round 3 */ LDRD r4, r5, [r0, #8] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -785,7 +785,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #32] MOV r10, r8 MOV r11, r9 - # Calc new W[3] + /* Calc new W[3] */ LDRD r4, r5, [sp, #8] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -829,7 +829,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #24] - # Round 4 + /* Round 4 */ LDRD r4, r5, [r0] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -909,7 +909,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #24] MOV r10, r8 MOV r11, r9 - # Calc new W[4] + /* Calc new W[4] */ LDRD r4, r5, [sp, #16] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -953,7 +953,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #32] - # Round 5 + /* Round 5 */ LDRD r4, r5, [r0, #56] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1033,7 +1033,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #16] MOV r10, r8 MOV r11, r9 - # Calc new W[5] + /* Calc new W[5] */ LDRD r4, r5, [sp, #24] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1077,7 +1077,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #40] - # Round 6 + /* Round 6 */ LDRD r4, r5, [r0, #48] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1157,7 +1157,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #8] MOV r10, r8 MOV r11, r9 - # Calc new W[6] + /* Calc new W[6] */ LDRD r4, r5, [sp, #32] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1201,7 +1201,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #48] - # Round 7 + /* Round 7 */ LDRD r4, r5, [r0, #40] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1281,7 +1281,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0] MOV r10, r8 MOV r11, r9 - # Calc new W[7] + /* Calc new W[7] */ LDRD r4, r5, [sp, #40] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1325,7 +1325,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #56] - # Round 8 + /* Round 8 */ LDRD r4, r5, [r0, #32] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1405,7 +1405,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #56] MOV r10, r8 MOV r11, r9 - # Calc new W[8] + /* Calc new W[8] */ LDRD r4, r5, [sp, #48] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1449,7 +1449,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #64] - # Round 9 + /* Round 9 */ LDRD r4, r5, [r0, #24] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1529,7 +1529,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #48] MOV r10, r8 MOV r11, r9 - # Calc new W[9] + /* Calc new W[9] */ LDRD r4, r5, [sp, #56] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1573,7 +1573,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #72] - # Round 10 + /* Round 10 */ LDRD r4, r5, [r0, #16] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1653,7 +1653,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #40] MOV r10, r8 MOV r11, r9 - # Calc new W[10] + /* Calc new W[10] */ LDRD r4, r5, [sp, #64] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1697,7 +1697,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #80] - # Round 11 + /* Round 11 */ LDRD r4, r5, [r0, #8] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1777,7 +1777,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #32] MOV r10, r8 MOV r11, r9 - # Calc new W[11] + /* Calc new W[11] */ LDRD r4, r5, [sp, #72] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1821,7 +1821,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #88] - # Round 12 + /* Round 12 */ LDRD r4, r5, [r0] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -1901,7 +1901,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #24] MOV r10, r8 MOV r11, r9 - # Calc new W[12] + /* Calc new W[12] */ LDRD r4, r5, [sp, #80] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -1945,7 +1945,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #96] - # Round 13 + /* Round 13 */ LDRD r4, r5, [r0, #56] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2025,7 +2025,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #16] MOV r10, r8 MOV r11, r9 - # Calc new W[13] + /* Calc new W[13] */ LDRD r4, r5, [sp, #88] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -2069,7 +2069,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #104] - # Round 14 + /* Round 14 */ LDRD r4, r5, [r0, #48] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2149,7 +2149,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #8] MOV r10, r8 MOV r11, r9 - # Calc new W[14] + /* Calc new W[14] */ LDRD r4, r5, [sp, #96] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -2193,7 +2193,7 @@ L_SHA512_transform_len_start: ADDS r4, r4, r6 ADC r5, r5, r7 STRD r4, r5, [sp, #112] - # Round 15 + /* Round 15 */ LDRD r4, r5, [r0, #40] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2273,7 +2273,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0] MOV r10, r8 MOV r11, r9 - # Calc new W[15] + /* Calc new W[15] */ LDRD r4, r5, [sp, #104] LSRS r6, r4, #19 LSRS r7, r5, #19 @@ -2319,12 +2319,12 @@ L_SHA512_transform_len_start: STRD r4, r5, [sp, #120] ADD r3, r3, #0x80 SUBS r12, r12, #0x1 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_SHA512_transform_len_start #else - BNE.N L_SHA512_transform_len_start + BNE.W L_SHA512_transform_len_start #endif - # Round 0 + /* Round 0 */ LDRD r4, r5, [r0, #32] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2404,7 +2404,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #56] MOV r10, r8 MOV r11, r9 - # Round 1 + /* Round 1 */ LDRD r4, r5, [r0, #24] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2484,7 +2484,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #48] MOV r10, r8 MOV r11, r9 - # Round 2 + /* Round 2 */ LDRD r4, r5, [r0, #16] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2564,7 +2564,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #40] MOV r10, r8 MOV r11, r9 - # Round 3 + /* Round 3 */ LDRD r4, r5, [r0, #8] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2644,7 +2644,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #32] MOV r10, r8 MOV r11, r9 - # Round 4 + /* Round 4 */ LDRD r4, r5, [r0] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2724,7 +2724,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #24] MOV r10, r8 MOV r11, r9 - # Round 5 + /* Round 5 */ LDRD r4, r5, [r0, #56] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2804,7 +2804,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #16] MOV r10, r8 MOV r11, r9 - # Round 6 + /* Round 6 */ LDRD r4, r5, [r0, #48] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2884,7 +2884,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #8] MOV r10, r8 MOV r11, r9 - # Round 7 + /* Round 7 */ LDRD r4, r5, [r0, #40] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -2964,7 +2964,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0] MOV r10, r8 MOV r11, r9 - # Round 8 + /* Round 8 */ LDRD r4, r5, [r0, #32] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3044,7 +3044,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #56] MOV r10, r8 MOV r11, r9 - # Round 9 + /* Round 9 */ LDRD r4, r5, [r0, #24] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3124,7 +3124,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #48] MOV r10, r8 MOV r11, r9 - # Round 10 + /* Round 10 */ LDRD r4, r5, [r0, #16] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3204,7 +3204,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #40] MOV r10, r8 MOV r11, r9 - # Round 11 + /* Round 11 */ LDRD r4, r5, [r0, #8] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3284,7 +3284,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #32] MOV r10, r8 MOV r11, r9 - # Round 12 + /* Round 12 */ LDRD r4, r5, [r0] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3364,7 +3364,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #24] MOV r10, r8 MOV r11, r9 - # Round 13 + /* Round 13 */ LDRD r4, r5, [r0, #56] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3444,7 +3444,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #16] MOV r10, r8 MOV r11, r9 - # Round 14 + /* Round 14 */ LDRD r4, r5, [r0, #48] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3524,7 +3524,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0, #8] MOV r10, r8 MOV r11, r9 - # Round 15 + /* Round 15 */ LDRD r4, r5, [r0, #40] LSRS r6, r4, #14 LSRS r7, r5, #14 @@ -3604,7 +3604,7 @@ L_SHA512_transform_len_start: STRD r6, r7, [r0] MOV r10, r8 MOV r11, r9 - # Add in digest from start + /* Add in digest from start */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] LDRD r8, r9, [sp, #128] @@ -3656,15 +3656,15 @@ L_SHA512_transform_len_start: SUBS r2, r2, #0x80 SUB r3, r3, #0x200 ADD r1, r1, #0x80 -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#ifdef __GNUC__ BNE L_SHA512_transform_len_begin #else - BNE.N L_SHA512_transform_len_begin + BNE.W L_SHA512_transform_len_begin #endif EOR r0, r0, r0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 5021 + /* Cycle Count = 5021 */ .size Transform_Sha512_Len,.-Transform_Sha512_Len #endif /* WOLFSSL_ARMASM_NO_NEON */ #endif /* WOLFSSL_SHA512 */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 3dc2d1f207..7521b35fa7 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -39,7 +39,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -108,8 +108,8 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p; register const byte* data __asm__ ("r1") = (const byte*)data_p; register word32 len __asm__ ("r2") = (word32)len_p; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ register uint64_t* L_SHA512_transform_len_k_c __asm__ ("r3") = (uint64_t*)&L_SHA512_transform_len_k; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" @@ -133,7 +133,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "STRD r10, r11, [sp, #184]\n\t" /* Start of loop processing a block */ "\n" - "L_SHA512_transform_len_begin_%=:\n\t" + "L_SHA512_transform_len_begin:\n\t" /* Load, Reverse and Store W */ "LDR r4, [%[data]]\n\t" "LDR r5, [%[data], #4]\n\t" @@ -239,7 +239,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "MOV r12, #0x4\n\t" /* Start of 16 rounds */ "\n" - "L_SHA512_transform_len_start_%=:\n\t" + "L_SHA512_transform_len_start:\n\t" /* Round 0 */ "LDRD r4, r5, [%[sha512], #32]\n\t" "LSRS r6, r4, #14\n\t" @@ -2226,10 +2226,10 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "STRD r4, r5, [sp, #120]\n\t" "ADD r3, r3, #0x80\n\t" "SUBS r12, r12, #0x1\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_SHA512_transform_len_start_%=\n\t" +#ifdef __GNUC__ + "BNE L_SHA512_transform_len_start\n\t" #else - "BNE.N L_SHA512_transform_len_start_%=\n\t" + "BNE.W L_SHA512_transform_len_start\n\t" #endif /* Round 0 */ "LDRD r4, r5, [%[sha512], #32]\n\t" @@ -3563,15 +3563,21 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "SUBS %[len], %[len], #0x80\n\t" "SUB r3, r3, #0x200\n\t" "ADD %[data], %[data], #0x80\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_SHA512_transform_len_begin_%=\n\t" +#ifdef __GNUC__ + "BNE L_SHA512_transform_len_begin\n\t" #else - "BNE.N L_SHA512_transform_len_begin_%=\n\t" + "BNE.W L_SHA512_transform_len_begin\n\t" #endif "EOR r0, r0, r0\n\t" "ADD sp, sp, #0xc0\n\t" - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), + [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : +#else + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -3580,7 +3586,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) #endif /* WOLFSSL_SHA512 */ #endif /* !__aarch64__ && __thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/caam/wolfcaam_seco.c b/wolfcrypt/src/port/caam/wolfcaam_seco.c index dbe6db987e..8326f308f2 100644 --- a/wolfcrypt/src/port/caam/wolfcaam_seco.c +++ b/wolfcrypt/src/port/caam/wolfcaam_seco.c @@ -1228,7 +1228,7 @@ word32 wc_SECO_WrapKey(word32 keyId, byte* in, word32 inSz, byte* iv, } -/* trasnlates the HSM error to wolfSSL error and does debug print out */ +/* Translates the HSM error to wolfSSL error and does debug print out */ int wc_TranslateHSMError(int current, hsm_err_t err) { int ret = -1; diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index a1ae275de6..c1c0fb8a06 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -55,6 +55,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -5403,10 +5404,13 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b __asm__ __volatile__ ( "sub sp, sp, #0x200\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_2048_mul_64_outer_%=: \n\t" "subs r3, r5, #0xfc\n\t" @@ -5451,13 +5455,86 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x100\n\t" - "beq L_sp_2048_mul_64_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_2048_mul_64_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_mul_64_inner_done_%=\n\t" + "blt L_sp_2048_mul_64_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_2048_mul_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -5465,14 +5542,46 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x1f8\n\t" + "cmp r5, #0x1f4\n\t" "ble L_sp_2048_mul_64_outer_%=\n\t" + "ldr lr, [%[a], #252]\n\t" + "ldr r11, [%[b], #252]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_2048_mul_64_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_2048_mul_64_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -5492,10 +5601,12 @@ static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x200\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_2048_sqr_64_outer_%=: \n\t" "subs r3, r5, #0xfc\n\t" @@ -5504,8 +5615,6 @@ static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_2048_sqr_64_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_2048_sqr_64_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -5557,9 +5666,11 @@ static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_2048_sqr_64_op_done_%=\n\t" - "\n" - "L_sp_2048_sqr_64_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_sqr_64_inner_done_%=\n\t" + "blt L_sp_2048_sqr_64_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -5588,30 +5699,46 @@ static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_sqr_64_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x100\n\t" - "beq L_sp_2048_sqr_64_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_2048_sqr_64_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_2048_sqr_64_inner_%=\n\t" - "\n" "L_sp_2048_sqr_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x1f8\n\t" + "cmp r5, #0x1f4\n\t" "ble L_sp_2048_sqr_64_outer_%=\n\t" + "ldr lr, [%[a], #252]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_2048_sqr_64_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_2048_sqr_64_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -5728,10 +5855,13 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_2048_mul_32_outer_%=: \n\t" "subs r3, r5, #0x7c\n\t" @@ -5776,13 +5906,86 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x80\n\t" - "beq L_sp_2048_mul_32_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_2048_mul_32_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_mul_32_inner_done_%=\n\t" + "blt L_sp_2048_mul_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_2048_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -5790,14 +5993,46 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0xf8\n\t" + "cmp r5, #0xf4\n\t" "ble L_sp_2048_mul_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" + "ldr r11, [%[b], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_2048_mul_32_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_2048_mul_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -5817,10 +6052,12 @@ static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_2048_sqr_32_outer_%=: \n\t" "subs r3, r5, #0x7c\n\t" @@ -5829,8 +6066,6 @@ static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_2048_sqr_32_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_2048_sqr_32_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -5882,9 +6117,11 @@ static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_2048_sqr_32_op_done_%=\n\t" - "\n" - "L_sp_2048_sqr_32_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_sqr_32_inner_done_%=\n\t" + "blt L_sp_2048_sqr_32_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -5913,30 +6150,46 @@ static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_sqr_32_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x80\n\t" - "beq L_sp_2048_sqr_32_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_2048_sqr_32_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_2048_sqr_32_inner_%=\n\t" - "\n" "L_sp_2048_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0xf8\n\t" + "cmp r5, #0xf4\n\t" "ble L_sp_2048_sqr_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_2048_sqr_32_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_2048_sqr_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -28088,10 +28341,13 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b __asm__ __volatile__ ( "sub sp, sp, #0x300\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_3072_mul_96_outer_%=: \n\t" "subs r3, r5, #0x17c\n\t" @@ -28136,13 +28392,86 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x180\n\t" - "beq L_sp_3072_mul_96_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_3072_mul_96_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_mul_96_inner_done_%=\n\t" + "blt L_sp_3072_mul_96_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_3072_mul_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -28150,14 +28479,46 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x2f8\n\t" + "cmp r5, #0x2f4\n\t" "ble L_sp_3072_mul_96_outer_%=\n\t" + "ldr lr, [%[a], #380]\n\t" + "ldr r11, [%[b], #380]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_3072_mul_96_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_3072_mul_96_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -28177,10 +28538,12 @@ static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x300\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_3072_sqr_96_outer_%=: \n\t" "subs r3, r5, #0x17c\n\t" @@ -28189,8 +28552,6 @@ static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_3072_sqr_96_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_3072_sqr_96_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -28242,9 +28603,11 @@ static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_3072_sqr_96_op_done_%=\n\t" - "\n" - "L_sp_3072_sqr_96_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_sqr_96_inner_done_%=\n\t" + "blt L_sp_3072_sqr_96_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -28273,30 +28636,46 @@ static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_sqr_96_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x180\n\t" - "beq L_sp_3072_sqr_96_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_3072_sqr_96_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_3072_sqr_96_inner_%=\n\t" - "\n" "L_sp_3072_sqr_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x2f8\n\t" + "cmp r5, #0x2f4\n\t" "ble L_sp_3072_sqr_96_outer_%=\n\t" + "ldr lr, [%[a], #380]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_3072_sqr_96_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_3072_sqr_96_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -28413,10 +28792,13 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b __asm__ __volatile__ ( "sub sp, sp, #0x180\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_3072_mul_48_outer_%=: \n\t" "subs r3, r5, #0xbc\n\t" @@ -28461,13 +28843,86 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0xc0\n\t" - "beq L_sp_3072_mul_48_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_3072_mul_48_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_mul_48_inner_done_%=\n\t" + "blt L_sp_3072_mul_48_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_3072_mul_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -28475,14 +28930,46 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x178\n\t" + "cmp r5, #0x174\n\t" "ble L_sp_3072_mul_48_outer_%=\n\t" + "ldr lr, [%[a], #188]\n\t" + "ldr r11, [%[b], #188]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_3072_mul_48_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_3072_mul_48_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -28502,10 +28989,12 @@ static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x180\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_3072_sqr_48_outer_%=: \n\t" "subs r3, r5, #0xbc\n\t" @@ -28514,8 +29003,6 @@ static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_3072_sqr_48_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_3072_sqr_48_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -28567,9 +29054,11 @@ static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_3072_sqr_48_op_done_%=\n\t" - "\n" - "L_sp_3072_sqr_48_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_sqr_48_inner_done_%=\n\t" + "blt L_sp_3072_sqr_48_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -28598,30 +29087,46 @@ static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_sqr_48_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0xc0\n\t" - "beq L_sp_3072_sqr_48_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_3072_sqr_48_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_3072_sqr_48_inner_%=\n\t" - "\n" "L_sp_3072_sqr_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x178\n\t" + "cmp r5, #0x174\n\t" "ble L_sp_3072_sqr_48_outer_%=\n\t" + "ldr lr, [%[a], #188]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_3072_sqr_48_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_3072_sqr_48_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -46058,10 +46563,13 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* __asm__ __volatile__ ( "sub sp, sp, #0x400\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_4096_mul_128_outer_%=: \n\t" "subs r3, r5, #0x1fc\n\t" @@ -46106,13 +46614,86 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x200\n\t" - "beq L_sp_4096_mul_128_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_4096_mul_128_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_4096_mul_128_inner_done_%=\n\t" + "blt L_sp_4096_mul_128_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_4096_mul_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -46120,14 +46701,46 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x3f8\n\t" + "cmp r5, #0x3f4\n\t" "ble L_sp_4096_mul_128_outer_%=\n\t" + "ldr lr, [%[a], #508]\n\t" + "ldr r11, [%[b], #508]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_4096_mul_128_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_4096_mul_128_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -46147,10 +46760,12 @@ static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x400\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_4096_sqr_128_outer_%=: \n\t" "subs r3, r5, #0x1fc\n\t" @@ -46159,8 +46774,6 @@ static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_4096_sqr_128_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_4096_sqr_128_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -46212,9 +46825,11 @@ static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_4096_sqr_128_op_done_%=\n\t" - "\n" - "L_sp_4096_sqr_128_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_4096_sqr_128_inner_done_%=\n\t" + "blt L_sp_4096_sqr_128_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -46243,30 +46858,46 @@ static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_4096_sqr_128_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x200\n\t" - "beq L_sp_4096_sqr_128_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_4096_sqr_128_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_4096_sqr_128_inner_%=\n\t" - "\n" "L_sp_4096_sqr_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x3f8\n\t" + "cmp r5, #0x3f4\n\t" "ble L_sp_4096_sqr_128_outer_%=\n\t" + "ldr lr, [%[a], #508]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_4096_sqr_128_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_4096_sqr_128_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -60831,10 +61462,13 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_256_mul_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" @@ -60879,13 +61513,86 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #32\n\t" - "beq L_sp_256_mul_8_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_256_mul_8_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_256_mul_8_inner_done_%=\n\t" + "blt L_sp_256_mul_8_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_256_mul_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -60893,14 +61600,46 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #56\n\t" + "cmp r5, #52\n\t" "ble L_sp_256_mul_8_outer_%=\n\t" + "ldr lr, [%[a], #28]\n\t" + "ldr r11, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_256_mul_8_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_256_mul_8_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -63403,10 +64142,12 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_256_sqr_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" @@ -63415,8 +64156,6 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_256_sqr_8_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_256_sqr_8_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -63468,9 +64207,11 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_256_sqr_8_op_done_%=\n\t" - "\n" - "L_sp_256_sqr_8_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_256_sqr_8_inner_done_%=\n\t" + "blt L_sp_256_sqr_8_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -63499,30 +64240,46 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_256_sqr_8_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #32\n\t" - "beq L_sp_256_sqr_8_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_256_sqr_8_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_256_sqr_8_inner_%=\n\t" - "\n" "L_sp_256_sqr_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #56\n\t" + "cmp r5, #52\n\t" "ble L_sp_256_sqr_8_outer_%=\n\t" + "ldr lr, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_256_sqr_8_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_256_sqr_8_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -70275,8 +71032,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_8(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_8(r, a, m, mp); for (; n > 1; n--) { @@ -78372,7 +79129,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -78428,7 +79185,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -79028,10 +79785,13 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ __asm__ __volatile__ ( "sub sp, sp, #0x60\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_384_mul_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" @@ -79076,13 +79836,86 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #48\n\t" - "beq L_sp_384_mul_12_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_384_mul_12_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_384_mul_12_inner_done_%=\n\t" + "blt L_sp_384_mul_12_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_384_mul_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -79090,14 +79923,46 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x58\n\t" + "cmp r5, #0x54\n\t" "ble L_sp_384_mul_12_outer_%=\n\t" + "ldr lr, [%[a], #44]\n\t" + "ldr r11, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_384_mul_12_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_384_mul_12_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -84616,10 +85481,12 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x60\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_384_sqr_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" @@ -84628,8 +85495,6 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_384_sqr_12_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_384_sqr_12_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -84681,9 +85546,11 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_384_sqr_12_op_done_%=\n\t" - "\n" - "L_sp_384_sqr_12_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_384_sqr_12_inner_done_%=\n\t" + "blt L_sp_384_sqr_12_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -84712,30 +85579,46 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_384_sqr_12_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #48\n\t" - "beq L_sp_384_sqr_12_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_384_sqr_12_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_384_sqr_12_inner_%=\n\t" - "\n" "L_sp_384_sqr_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x58\n\t" + "cmp r5, #0x54\n\t" "ble L_sp_384_sqr_12_outer_%=\n\t" + "ldr lr, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_384_sqr_12_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_384_sqr_12_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -88978,8 +89861,8 @@ SP_NOINLINE static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_12(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_12(r, a, m, mp); for (; n > 1; n--) { @@ -96322,7 +97205,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -96378,7 +97261,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -97020,10 +97903,13 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_521_mul_17_outer_%=: \n\t" "subs r3, r5, #0x40\n\t" @@ -97068,13 +97954,86 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x44\n\t" - "beq L_sp_521_mul_17_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_521_mul_17_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_521_mul_17_inner_done_%=\n\t" + "blt L_sp_521_mul_17_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_521_mul_17_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -97082,17 +98041,49 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x80\n\t" + "cmp r5, #0x7c\n\t" "ble L_sp_521_mul_17_outer_%=\n\t" + "ldr lr, [%[a], #64]\n\t" + "ldr r11, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "ldm sp!, {r6, r7}\n\t" "stm %[r]!, {r6, r7}\n\t" "sub r5, r5, #8\n\t" "\n" "L_sp_521_mul_17_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_521_mul_17_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -108130,10 +109121,12 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_521_sqr_17_outer_%=: \n\t" "subs r3, r5, #0x40\n\t" @@ -108142,8 +109135,6 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_521_sqr_17_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_521_sqr_17_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -108195,9 +109186,11 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_521_sqr_17_op_done_%=\n\t" - "\n" - "L_sp_521_sqr_17_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_521_sqr_17_inner_done_%=\n\t" + "blt L_sp_521_sqr_17_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -108226,33 +109219,49 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_521_sqr_17_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x44\n\t" - "beq L_sp_521_sqr_17_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_521_sqr_17_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_521_sqr_17_inner_%=\n\t" - "\n" "L_sp_521_sqr_17_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0x80\n\t" + "cmp r5, #0x7c\n\t" "ble L_sp_521_sqr_17_outer_%=\n\t" + "ldr lr, [%[a], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "ldm sp!, {r6, r7}\n\t" "stm %[r]!, {r6, r7}\n\t" "sub r5, r5, #8\n\t" "\n" "L_sp_521_sqr_17_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_521_sqr_17_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -115841,8 +116850,8 @@ SP_NOINLINE static void sp_521_mont_sqr_17(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_17(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_17(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_17(r, a, m, mp); for (; n > 1; n--) { @@ -125146,7 +126155,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -125202,7 +126211,7 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -141063,10 +142072,13 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_1024_mul_32_outer_%=: \n\t" "subs r3, r5, #0x7c\n\t" @@ -141111,13 +142123,86 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" #endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #0x80\n\t" - "beq L_sp_1024_mul_32_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_1024_mul_32_inner_%=\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_1024_mul_32_inner_done_%=\n\t" + "blt L_sp_1024_mul_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "\n" "L_sp_1024_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" @@ -141125,14 +142210,46 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0xf8\n\t" + "cmp r5, #0xf4\n\t" "ble L_sp_1024_mul_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" + "ldr r11, [%[b], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_1024_mul_32_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_1024_mul_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -141152,10 +142269,12 @@ static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" - "mov r6, #0\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" + "mov r5, #4\n\t" "\n" "L_sp_1024_sqr_32_outer_%=: \n\t" "subs r3, r5, #0x7c\n\t" @@ -141164,8 +142283,6 @@ static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) "sub r4, r5, r3\n\t" "\n" "L_sp_1024_sqr_32_inner_%=: \n\t" - "cmp r4, r3\n\t" - "beq L_sp_1024_sqr_32_op_sqr_%=\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -141217,9 +142334,11 @@ static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" #endif - "bal L_sp_1024_sqr_32_op_done_%=\n\t" - "\n" - "L_sp_1024_sqr_32_op_sqr_%=: \n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_1024_sqr_32_inner_done_%=\n\t" + "blt L_sp_1024_sqr_32_inner_%=\n\t" "ldr lr, [%[a], r3]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "lsl r9, lr, #16\n\t" @@ -141248,30 +142367,46 @@ static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_1024_sqr_32_op_done_%=: \n\t" - "add r3, r3, #4\n\t" - "sub r4, r4, #4\n\t" - "cmp r3, #0x80\n\t" - "beq L_sp_1024_sqr_32_inner_done_%=\n\t" - "cmp r3, r4\n\t" - "bgt L_sp_1024_sqr_32_inner_done_%=\n\t" - "cmp r3, r5\n\t" - "ble L_sp_1024_sqr_32_inner_%=\n\t" - "\n" "L_sp_1024_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #0xf8\n\t" + "cmp r5, #0xf4\n\t" "ble L_sp_1024_sqr_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" "\n" "L_sp_1024_sqr_32_store_%=: \n\t" - "ldm sp!, {r6, r7, r8, r9}\n\t" - "stm %[r]!, {r6, r7, r8, r9}\n\t" - "subs r5, r5, #16\n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" "bgt L_sp_1024_sqr_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -155455,7 +156590,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -155515,7 +156650,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 2ba0058e93..ed66e6d198 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -55,6 +55,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -98,7 +99,7 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -6991,7 +6992,7 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -16594,7 +16595,7 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -21871,7 +21872,8 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +SP_NOINLINE static void sp_256_mul_4(sp_digit* r, const sp_digit* a, + const sp_digit* b) { __asm__ __volatile__ ( "ldp x13, x14, [%[a], 0]\n\t" @@ -21977,7 +21979,7 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_4(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_256_sqr_4(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "ldp x12, x13, [%[a], 0]\n\t" @@ -22420,8 +22422,8 @@ static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m) * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m, sp_digit mp) { (void)m; (void)mp; @@ -22595,8 +22597,8 @@ static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, - sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, + const sp_digit* m, sp_digit mp) { (void)m; (void)mp; @@ -22740,8 +22742,8 @@ static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_4(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_4(r, a, m, mp); for (; n > 1; n--) { @@ -23080,7 +23082,8 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x3, x4, [%[a]]\n\t" @@ -23120,7 +23123,8 @@ static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x9, x10, [%[a]]\n\t" @@ -23175,8 +23179,8 @@ static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -23217,7 +23221,8 @@ static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to divide. * m Modulus (prime). */ -static void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" @@ -23249,8 +23254,8 @@ static void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_rsb_sub_dbl_4(sp_digit* r, const sp_digit* a, - sp_digit* b, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_rsb_sub_dbl_4(sp_digit* r, + const sp_digit* a, sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x8, x9, [%[b]]\n\t" @@ -23326,8 +23331,8 @@ static void sp_256_mont_rsb_sub_dbl_4(sp_digit* r, const sp_digit* a, * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_sub_4(sp_digit* ra, sp_digit* rs, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_add_sub_4(sp_digit* ra, + sp_digit* rs, const sp_digit* a, const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -23806,7 +23811,8 @@ static void sp_256_proj_point_add_4(sp_point_256* r, : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (x), [y] "r" (y), [z] "r" (z) : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", - "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" ); } } @@ -24038,7 +24044,8 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (ctx->x), [y] "r" (ctx->y), [z] "r" (ctx->z) : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", - "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" ); } ctx->state = 25; @@ -24281,8 +24288,8 @@ static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v) * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_point_33_4(sp_point_256* r, const sp_point_256* table, - int idx) +SP_NOINLINE static void sp_256_get_point_33_4(sp_point_256* r, + const sp_point_256* table, int idx) { __asm__ __volatile__ ( "mov w30, #1\n\t" @@ -24339,7 +24346,7 @@ static void sp_256_get_point_33_4(sp_point_256* r, const sp_point_256* table, "stp x13, x14, [%[r], #144]\n\t" : [table] "+r" (table) : [r] "r" (r), [idx] "r" (idx) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" ); } #endif /* !WC_NO_CACHE_RESISTANT */ @@ -24608,7 +24615,8 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (x), [y] "r" (y), [z] "r" (z) : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", - "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" ); } } @@ -24739,7 +24747,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_64_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_64_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { __asm__ __volatile__ ( @@ -24783,7 +24791,7 @@ static void sp_256_get_entry_64_4(sp_point_256* r, "stp x9, x10, [%[r], #80]\n\t" : [table] "+r" (table) : [r] "r" (r), [idx] "r" (idx) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" ); } #endif /* !WC_NO_CACHE_RESISTANT */ @@ -25168,7 +25176,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_256_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_256_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { __asm__ __volatile__ ( @@ -25212,7 +25220,7 @@ static void sp_256_get_entry_256_4(sp_point_256* r, "stp x9, x10, [%[r], #80]\n\t" : [table] "+r" (table) : [r] "r" (r), [idx] "r" (idx) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" ); } #endif /* !WC_NO_CACHE_RESISTANT */ @@ -27367,7 +27375,7 @@ static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v) * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_65_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_65_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { __asm__ __volatile__ ( @@ -27411,7 +27419,7 @@ static void sp_256_get_entry_65_4(sp_point_256* r, "stp x9, x10, [%[r], #80]\n\t" : [table] "+r" (table) : [r] "r" (r), [idx] "r" (idx) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" ); } #endif /* !WC_NO_CACHE_RESISTANT */ @@ -39677,7 +39685,7 @@ static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -40320,7 +40328,8 @@ static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit * a First operand of the multiplication. * b Second operand of the multiplication. */ -static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +SP_NOINLINE static void sp_256_mont_mul_order_4(sp_digit* r, + const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( "ldp x13, x14, [%[a], 0]\n\t" @@ -40545,7 +40554,8 @@ static const uint64_t p256_order_minus_2[4] = { * r Result of the squaring. * a Number to square. */ -static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_256_mont_sqr_order_4(sp_digit* r, + const sp_digit* a) { __asm__ __volatile__ ( "ldp x12, x13, [%[a], 0]\n\t" @@ -40731,7 +40741,8 @@ static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) * r Result of the squaring. * a Number to square. */ -static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) +SP_NOINLINE static void sp_256_mont_sqr_n_order_4(sp_digit* r, + const sp_digit* a, int n) { __asm__ __volatile__ ( @@ -42079,8 +42090,8 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -42116,7 +42127,7 @@ static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, (void)m; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -42172,7 +42183,7 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -43960,8 +43971,8 @@ SP_NOINLINE static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_6(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_6(r, a, m, mp); for (; n > 1; n--) { @@ -44249,8 +44260,8 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -44264,7 +44275,8 @@ static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -44278,7 +44290,8 @@ static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -44376,8 +44389,8 @@ static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -44412,7 +44425,8 @@ static void sp_384_rshift1_6(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_mont_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_div2_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -45259,8 +45273,8 @@ static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v) * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_384_get_point_33_6(sp_point_384* r, const sp_point_384* table, - int idx) +SP_NOINLINE static void sp_384_get_point_33_6(sp_point_384* r, + const sp_point_384* table, int idx) { int i; sp_digit mask; @@ -66438,7 +66452,7 @@ static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -68193,7 +68207,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -68249,7 +68263,7 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -72147,8 +72161,8 @@ SP_NOINLINE static void sp_521_mont_sqr_9(sp_digit* r, const sp_digit* a, const * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_9(r, a, m, mp); for (; n > 1; n--) { @@ -72447,8 +72461,8 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -72500,7 +72514,8 @@ static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -72547,7 +72562,8 @@ static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -72604,8 +72620,8 @@ static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_sub_9(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_521_mont_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -72786,7 +72802,8 @@ static void sp_521_rshift1_9(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -73635,8 +73652,8 @@ static void sp_521_ecc_recode_6_9(const sp_digit* k, ecc_recode_521* v) * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_521_get_point_33_9(sp_point_521* r, const sp_point_521* table, - int idx) +SP_NOINLINE static void sp_521_get_point_33_9(sp_point_521* r, + const sp_point_521* table, int idx) { int i; sp_digit mask; @@ -111599,7 +111616,7 @@ static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -113134,7 +113151,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -113190,7 +113207,7 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -116257,8 +116274,8 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -116358,7 +116375,8 @@ static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -116450,7 +116468,8 @@ static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -116612,8 +116631,8 @@ static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, const sp_digit* * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -116881,7 +116900,8 @@ static void sp_1024_rshift1_16(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_mont_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_div2_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -125220,7 +125240,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) "subs x6, %[n], 8\n\t" "mov x7, xzr\n\t" "blt 2f\n\t" - /* Put in mulitples of 8 bytes. */ + /* Put in multiples of 8 bytes. */ "1:\n\t" "ldr x8, [x4], -8\n\t" "subs x6, x6, 8\n\t" @@ -125314,7 +125334,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) ); } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -125374,7 +125394,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 1873ef373b..98a338b686 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -55,6 +55,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -98861,8 +98862,8 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_8(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_8(r, a, m, mp); for (; n > 1; n--) { @@ -107673,7 +107674,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -107729,7 +107730,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -110309,8 +110310,8 @@ SP_NOINLINE static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_12(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_12(r, a, m, mp); for (; n > 1; n--) { @@ -118869,7 +118870,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -118925,7 +118926,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -122593,8 +122594,8 @@ SP_NOINLINE static void sp_521_mont_sqr_17(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_17(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_17(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_17(r, a, m, mp); for (; n > 1; n--) { @@ -135811,7 +135812,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -135867,7 +135868,7 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -218580,7 +218581,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -218640,7 +218641,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 37a7ea28dc..468e0fcfef 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -59,6 +59,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -20987,8 +20988,8 @@ SP_NOINLINE static void sp_256_mont_sqr_9(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_9(r, a, m, mp); for (; n > 1; n--) { @@ -26540,7 +26541,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -26596,7 +26597,7 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -28395,8 +28396,8 @@ SP_NOINLINE static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_15(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_15(r, a, m, mp); for (; n > 1; n--) { @@ -34621,7 +34622,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -34677,7 +34678,7 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -36068,8 +36069,8 @@ SP_NOINLINE static void sp_521_mont_sqr_21(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_21(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_21(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_21(r, a, m, mp); for (; n > 1; n--) { @@ -42795,7 +42796,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -42851,7 +42852,7 @@ static int sp_521_ecc_is_point_21(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -54684,7 +54685,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -54744,7 +54745,7 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index d1888cd474..a2b97d8169 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -59,6 +59,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -21967,8 +21968,8 @@ SP_NOINLINE static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_5(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_5(r, a, m, mp); for (; n > 1; n--) { @@ -27434,7 +27435,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -27490,7 +27491,7 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -28871,8 +28872,8 @@ SP_NOINLINE static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_7(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_7(r, a, m, mp); for (; n > 1; n--) { @@ -34902,7 +34903,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -34958,7 +34959,7 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -36409,8 +36410,8 @@ SP_NOINLINE static void sp_521_mont_sqr_9(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_9(r, a, m, mp); for (; n > 1; n--) { @@ -42386,7 +42387,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -42442,7 +42443,7 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -53268,7 +53269,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -53328,7 +53329,7 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 48263c751d..285f4eb3d5 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -55,6 +55,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -2222,7 +2223,7 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x100\n\t" "\n" - "L_sp_2048_add_64_word_%=:\n\t" + "L_sp_2048_add_64_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -2235,9 +2236,9 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_2048_add_64_word_%=\n\t" + "BNE L_sp_2048_add_64_word\n\t" #else - "BNE.N L_sp_2048_add_64_word_%=\n\t" + "BNE.N L_sp_2048_add_64_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -2269,7 +2270,7 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x100\n\t" "\n" - "L_sp_2048_sub_in_pkace_64_word_%=:\n\t" + "L_sp_2048_sub_in_pkace_64_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -2281,9 +2282,9 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "BNE L_sp_2048_sub_in_pkace_64_word\n\t" #else - "BNE.N L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "BNE.N L_sp_2048_sub_in_pkace_64_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -2315,61 +2316,80 @@ static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x200\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_2048_mul_64_outer_%=:\n\t" + "L_sp_2048_mul_64_outer:\n\t" "SUBS r3, r5, #0xfc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_2048_mul_64_inner_%=:\n\t" + "L_sp_2048_mul_64_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x100\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_mul_64_inner_done_%=\n\t" + "BGT L_sp_2048_mul_64_inner_done\n\t" #else - "BEQ.N L_sp_2048_mul_64_inner_done_%=\n\t" + "BGT.N L_sp_2048_mul_64_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_mul_64_inner_%=\n\t" + "BLT L_sp_2048_mul_64_inner\n\t" #else - "BLE.N L_sp_2048_mul_64_inner_%=\n\t" + "BLT.N L_sp_2048_mul_64_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_2048_mul_64_inner_done_%=:\n\t" + "L_sp_2048_mul_64_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x1f8\n\t" + "CMP r5, #0x1f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_mul_64_outer_%=\n\t" + "BLE L_sp_2048_mul_64_outer\n\t" #else - "BLE.N L_sp_2048_mul_64_outer_%=\n\t" + "BLE.N L_sp_2048_mul_64_outer\n\t" #endif + "LDR lr, [%[a], #252]\n\t" + "LDR r11, [%[b], #252]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_2048_mul_64_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_2048_mul_64_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_mul_64_store_%=\n\t" + "BGT L_sp_2048_mul_64_store\n\t" #else - "BGT.N L_sp_2048_mul_64_store_%=\n\t" + "BGT.N L_sp_2048_mul_64_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -2395,24 +2415,20 @@ static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x200\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_2048_sqr_64_outer_%=:\n\t" + "L_sp_2048_sqr_64_outer:\n\t" "SUBS r3, r5, #0xfc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_2048_sqr_64_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_sqr_64_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_2048_sqr_64_op_sqr_%=\n\t" -#endif + "L_sp_2048_sqr_64_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -2422,59 +2438,51 @@ static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_2048_sqr_64_op_done_%=\n\t" - "\n" - "L_sp_2048_sqr_64_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_2048_sqr_64_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x100\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_sqr_64_inner_done_%=\n\t" -#else - "BEQ.N L_sp_2048_sqr_64_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_sqr_64_inner_done_%=\n\t" + "BGT L_sp_2048_sqr_64_inner_done\n\t" #else - "BGT.N L_sp_2048_sqr_64_inner_done_%=\n\t" + "BGT.N L_sp_2048_sqr_64_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_sqr_64_inner_%=\n\t" + "BLT L_sp_2048_sqr_64_inner\n\t" #else - "BLE.N L_sp_2048_sqr_64_inner_%=\n\t" + "BLT.N L_sp_2048_sqr_64_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_2048_sqr_64_inner_done_%=:\n\t" + "L_sp_2048_sqr_64_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x1f8\n\t" + "CMP r5, #0x1f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_sqr_64_outer_%=\n\t" + "BLE L_sp_2048_sqr_64_outer\n\t" #else - "BLE.N L_sp_2048_sqr_64_outer_%=\n\t" + "BLE.N L_sp_2048_sqr_64_outer\n\t" #endif + "LDR lr, [%[a], #252]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_2048_sqr_64_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_2048_sqr_64_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_sqr_64_store_%=\n\t" + "BGT L_sp_2048_sqr_64_store\n\t" #else - "BGT.N L_sp_2048_sqr_64_store_%=\n\t" + "BGT.N L_sp_2048_sqr_64_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -2524,7 +2532,7 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x80\n\t" "\n" - "L_sp_2048_add_32_word_%=:\n\t" + "L_sp_2048_add_32_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -2537,9 +2545,9 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_2048_add_32_word_%=\n\t" + "BNE L_sp_2048_add_32_word\n\t" #else - "BNE.N L_sp_2048_add_32_word_%=\n\t" + "BNE.N L_sp_2048_add_32_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -2571,7 +2579,7 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x80\n\t" "\n" - "L_sp_2048_sub_in_pkace_32_word_%=:\n\t" + "L_sp_2048_sub_in_pkace_32_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -2583,9 +2591,9 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "BNE L_sp_2048_sub_in_pkace_32_word\n\t" #else - "BNE.N L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "BNE.N L_sp_2048_sub_in_pkace_32_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -2617,61 +2625,80 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_2048_mul_32_outer_%=:\n\t" + "L_sp_2048_mul_32_outer:\n\t" "SUBS r3, r5, #0x7c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_2048_mul_32_inner_%=:\n\t" + "L_sp_2048_mul_32_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x80\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_mul_32_inner_done_%=\n\t" + "BGT L_sp_2048_mul_32_inner_done\n\t" #else - "BEQ.N L_sp_2048_mul_32_inner_done_%=\n\t" + "BGT.N L_sp_2048_mul_32_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_mul_32_inner_%=\n\t" + "BLT L_sp_2048_mul_32_inner\n\t" #else - "BLE.N L_sp_2048_mul_32_inner_%=\n\t" + "BLT.N L_sp_2048_mul_32_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_2048_mul_32_inner_done_%=:\n\t" + "L_sp_2048_mul_32_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0xf8\n\t" + "CMP r5, #0xf4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_mul_32_outer_%=\n\t" + "BLE L_sp_2048_mul_32_outer\n\t" #else - "BLE.N L_sp_2048_mul_32_outer_%=\n\t" + "BLE.N L_sp_2048_mul_32_outer\n\t" #endif + "LDR lr, [%[a], #124]\n\t" + "LDR r11, [%[b], #124]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_2048_mul_32_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_2048_mul_32_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_mul_32_store_%=\n\t" + "BGT L_sp_2048_mul_32_store\n\t" #else - "BGT.N L_sp_2048_mul_32_store_%=\n\t" + "BGT.N L_sp_2048_mul_32_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -2697,24 +2724,20 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_2048_sqr_32_outer_%=:\n\t" + "L_sp_2048_sqr_32_outer:\n\t" "SUBS r3, r5, #0x7c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_2048_sqr_32_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_sqr_32_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_2048_sqr_32_op_sqr_%=\n\t" -#endif + "L_sp_2048_sqr_32_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -2724,59 +2747,51 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_2048_sqr_32_op_done_%=\n\t" - "\n" - "L_sp_2048_sqr_32_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_2048_sqr_32_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x80\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_2048_sqr_32_inner_done_%=\n\t" -#else - "BEQ.N L_sp_2048_sqr_32_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_sqr_32_inner_done_%=\n\t" + "BGT L_sp_2048_sqr_32_inner_done\n\t" #else - "BGT.N L_sp_2048_sqr_32_inner_done_%=\n\t" + "BGT.N L_sp_2048_sqr_32_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_sqr_32_inner_%=\n\t" + "BLT L_sp_2048_sqr_32_inner\n\t" #else - "BLE.N L_sp_2048_sqr_32_inner_%=\n\t" + "BLT.N L_sp_2048_sqr_32_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_2048_sqr_32_inner_done_%=:\n\t" + "L_sp_2048_sqr_32_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0xf8\n\t" + "CMP r5, #0xf4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_2048_sqr_32_outer_%=\n\t" + "BLE L_sp_2048_sqr_32_outer\n\t" #else - "BLE.N L_sp_2048_sqr_32_outer_%=\n\t" + "BLE.N L_sp_2048_sqr_32_outer\n\t" #endif + "LDR lr, [%[a], #124]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_2048_sqr_32_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_2048_sqr_32_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_2048_sqr_32_store_%=\n\t" + "BGT L_sp_2048_sqr_32_store\n\t" #else - "BGT.N L_sp_2048_sqr_32_store_%=\n\t" + "BGT.N L_sp_2048_sqr_32_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -2835,7 +2850,7 @@ static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_2048_mul_d_64_word_%=:\n\t" + "L_sp_2048_mul_d_64_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -2849,9 +2864,9 @@ static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mul_d_64_word_%=\n\t" + "BLT L_sp_2048_mul_d_64_word\n\t" #else - "BLT.N L_sp_2048_mul_d_64_word_%=\n\t" + "BLT.N L_sp_2048_mul_d_64_word\n\t" #endif "STR r3, [%[r], #256]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -3249,7 +3264,7 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_2048_cond_sub_32_words_%=:\n\t" + "L_sp_2048_cond_sub_32_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -3260,9 +3275,9 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "ADD r5, r5, #0x4\n\t" "CMP r5, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_cond_sub_32_words_%=\n\t" + "BLT L_sp_2048_cond_sub_32_words\n\t" #else - "BLT.N L_sp_2048_cond_sub_32_words_%=\n\t" + "BLT.N L_sp_2048_cond_sub_32_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -3445,7 +3460,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=:\n\t" + "L_sp_2048_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -3708,9 +3723,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x80\n\t" #ifdef __GNUC__ - "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_word\n\t" #else - "BLT.W L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT.W L_sp_2048_mont_reduce_32_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -3749,7 +3764,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=:\n\t" + "L_sp_2048_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -3757,7 +3772,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_32_mul_%=:\n\t" + "L_sp_2048_mont_reduce_32_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -3800,9 +3815,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_32_mul_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_mul\n\t" #else - "BLT.N L_sp_2048_mont_reduce_32_mul_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_32_mul\n\t" #endif "LDR r10, [%[a], #128]\n\t" "ADDS r4, r4, r3\n\t" @@ -3816,9 +3831,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_word\n\t" #else - "BLT.N L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_32_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -3860,7 +3875,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=:\n\t" + "L_sp_2048_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -4028,9 +4043,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x80\n\t" #ifdef __GNUC__ - "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_word\n\t" #else - "BLT.W L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT.W L_sp_2048_mont_reduce_32_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -4072,7 +4087,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=:\n\t" + "L_sp_2048_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -4080,7 +4095,7 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_32_mul_%=:\n\t" + "L_sp_2048_mont_reduce_32_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -4111,9 +4126,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_32_mul_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_mul\n\t" #else - "BLT.N L_sp_2048_mont_reduce_32_mul_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_32_mul\n\t" #endif "LDR r10, [%[a], #128]\n\t" "ADDS r4, r4, r3\n\t" @@ -4127,9 +4142,9 @@ static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_32_word\n\t" #else - "BLT.N L_sp_2048_mont_reduce_32_word_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_32_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -4200,7 +4215,7 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_2048_mul_d_32_word_%=:\n\t" + "L_sp_2048_mul_d_32_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -4214,9 +4229,9 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mul_d_32_word_%=\n\t" + "BLT L_sp_2048_mul_d_32_word\n\t" #else - "BLT.N L_sp_2048_mul_d_32_word_%=\n\t" + "BLT.N L_sp_2048_mul_d_32_word\n\t" #endif "STR r3, [%[r], #128]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -4423,9 +4438,9 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -4488,9 +4503,9 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -4514,7 +4529,7 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_2048_word_32_bit_%=:\n\t" + "L_div_2048_word_32_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -4524,7 +4539,7 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_2048_word_32_bit_%=\n\t" + "bpl L_div_2048_word_32_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -4576,7 +4591,7 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x7c\n\t" "\n" - "L_sp_2048_cmp_32_words_%=:\n\t" + "L_sp_2048_cmp_32_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -4589,7 +4604,7 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_2048_cmp_32_words_%=\n\t" + "bcs L_sp_2048_cmp_32_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #124]\n\t" @@ -5377,7 +5392,7 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_2048_cond_sub_64_words_%=:\n\t" + "L_sp_2048_cond_sub_64_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -5388,9 +5403,9 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig "ADD r5, r5, #0x4\n\t" "CMP r5, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_cond_sub_64_words_%=\n\t" + "BLT L_sp_2048_cond_sub_64_words\n\t" #else - "BLT.N L_sp_2048_cond_sub_64_words_%=\n\t" + "BLT.N L_sp_2048_cond_sub_64_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -5685,7 +5700,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=:\n\t" + "L_sp_2048_mont_reduce_64_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -6204,9 +6219,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x100\n\t" #ifdef __GNUC__ - "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_word\n\t" #else - "BLT.W L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT.W L_sp_2048_mont_reduce_64_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -6245,7 +6260,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=:\n\t" + "L_sp_2048_mont_reduce_64_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -6253,7 +6268,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_64_mul_%=:\n\t" + "L_sp_2048_mont_reduce_64_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -6296,9 +6311,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_64_mul_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_mul\n\t" #else - "BLT.N L_sp_2048_mont_reduce_64_mul_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_64_mul\n\t" #endif "LDR r10, [%[a], #256]\n\t" "ADDS r4, r4, r3\n\t" @@ -6312,9 +6327,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_word\n\t" #else - "BLT.N L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_64_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -6356,7 +6371,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=:\n\t" + "L_sp_2048_mont_reduce_64_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -6684,9 +6699,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x100\n\t" #ifdef __GNUC__ - "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_word\n\t" #else - "BLT.W L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT.W L_sp_2048_mont_reduce_64_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -6728,7 +6743,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=:\n\t" + "L_sp_2048_mont_reduce_64_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -6736,7 +6751,7 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_2048_mont_reduce_64_mul_%=:\n\t" + "L_sp_2048_mont_reduce_64_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -6767,9 +6782,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_64_mul_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_mul\n\t" #else - "BLT.N L_sp_2048_mont_reduce_64_mul_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_64_mul\n\t" #endif "LDR r10, [%[a], #256]\n\t" "ADDS r4, r4, r3\n\t" @@ -6783,9 +6798,9 @@ static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT L_sp_2048_mont_reduce_64_word\n\t" #else - "BLT.N L_sp_2048_mont_reduce_64_word_%=\n\t" + "BLT.N L_sp_2048_mont_reduce_64_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -6851,7 +6866,7 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x100\n\t" "\n" - "L_sp_2048_sub_64_word_%=:\n\t" + "L_sp_2048_sub_64_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -6863,9 +6878,9 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_2048_sub_64_word_%=\n\t" + "BNE L_sp_2048_sub_64_word\n\t" #else - "BNE.N L_sp_2048_sub_64_word_%=\n\t" + "BNE.N L_sp_2048_sub_64_word\n\t" #endif "MOV %[r], r11\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -7027,9 +7042,9 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -7092,9 +7107,9 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -7118,7 +7133,7 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_2048_word_64_bit_%=:\n\t" + "L_div_2048_word_64_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -7128,7 +7143,7 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_2048_word_64_bit_%=\n\t" + "bpl L_div_2048_word_64_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -7283,7 +7298,7 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0xfc\n\t" "\n" - "L_sp_2048_cmp_64_words_%=:\n\t" + "L_sp_2048_cmp_64_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -7296,7 +7311,7 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_2048_cmp_64_words_%=\n\t" + "bcs L_sp_2048_cmp_64_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #252]\n\t" @@ -8559,7 +8574,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig "MOV r8, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_2048_cond_add_32_words_%=:\n\t" + "L_sp_2048_cond_add_32_words:\n\t" "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" @@ -8570,9 +8585,9 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig "ADD r4, r4, #0x4\n\t" "CMP r4, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_2048_cond_add_32_words_%=\n\t" + "BLT L_sp_2048_cond_add_32_words\n\t" #else - "BLT.N L_sp_2048_cond_add_32_words_%=\n\t" + "BLT.N L_sp_2048_cond_add_32_words\n\t" #endif "MOV %[r], r5\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -12945,7 +12960,7 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x180\n\t" "\n" - "L_sp_3072_add_96_word_%=:\n\t" + "L_sp_3072_add_96_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -12958,9 +12973,9 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_3072_add_96_word_%=\n\t" + "BNE L_sp_3072_add_96_word\n\t" #else - "BNE.N L_sp_3072_add_96_word_%=\n\t" + "BNE.N L_sp_3072_add_96_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -12992,7 +13007,7 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x180\n\t" "\n" - "L_sp_3072_sub_in_pkace_96_word_%=:\n\t" + "L_sp_3072_sub_in_pkace_96_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -13004,9 +13019,9 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "BNE L_sp_3072_sub_in_pkace_96_word\n\t" #else - "BNE.N L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "BNE.N L_sp_3072_sub_in_pkace_96_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -13038,61 +13053,80 @@ static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x300\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_3072_mul_96_outer_%=:\n\t" + "L_sp_3072_mul_96_outer:\n\t" "SUBS r3, r5, #0x17c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_3072_mul_96_inner_%=:\n\t" + "L_sp_3072_mul_96_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x180\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_mul_96_inner_done_%=\n\t" + "BGT L_sp_3072_mul_96_inner_done\n\t" #else - "BEQ.N L_sp_3072_mul_96_inner_done_%=\n\t" + "BGT.N L_sp_3072_mul_96_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_mul_96_inner_%=\n\t" + "BLT L_sp_3072_mul_96_inner\n\t" #else - "BLE.N L_sp_3072_mul_96_inner_%=\n\t" + "BLT.N L_sp_3072_mul_96_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_3072_mul_96_inner_done_%=:\n\t" + "L_sp_3072_mul_96_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x2f8\n\t" + "CMP r5, #0x2f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_mul_96_outer_%=\n\t" + "BLE L_sp_3072_mul_96_outer\n\t" #else - "BLE.N L_sp_3072_mul_96_outer_%=\n\t" + "BLE.N L_sp_3072_mul_96_outer\n\t" #endif + "LDR lr, [%[a], #380]\n\t" + "LDR r11, [%[b], #380]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_3072_mul_96_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_3072_mul_96_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_mul_96_store_%=\n\t" + "BGT L_sp_3072_mul_96_store\n\t" #else - "BGT.N L_sp_3072_mul_96_store_%=\n\t" + "BGT.N L_sp_3072_mul_96_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -13118,24 +13152,20 @@ static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x300\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_3072_sqr_96_outer_%=:\n\t" + "L_sp_3072_sqr_96_outer:\n\t" "SUBS r3, r5, #0x17c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_3072_sqr_96_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_sqr_96_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_3072_sqr_96_op_sqr_%=\n\t" -#endif + "L_sp_3072_sqr_96_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -13145,59 +13175,51 @@ static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_3072_sqr_96_op_done_%=\n\t" - "\n" - "L_sp_3072_sqr_96_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_3072_sqr_96_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x180\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_sqr_96_inner_done_%=\n\t" -#else - "BEQ.N L_sp_3072_sqr_96_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_sqr_96_inner_done_%=\n\t" + "BGT L_sp_3072_sqr_96_inner_done\n\t" #else - "BGT.N L_sp_3072_sqr_96_inner_done_%=\n\t" + "BGT.N L_sp_3072_sqr_96_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_sqr_96_inner_%=\n\t" + "BLT L_sp_3072_sqr_96_inner\n\t" #else - "BLE.N L_sp_3072_sqr_96_inner_%=\n\t" + "BLT.N L_sp_3072_sqr_96_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_3072_sqr_96_inner_done_%=:\n\t" + "L_sp_3072_sqr_96_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x2f8\n\t" + "CMP r5, #0x2f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_sqr_96_outer_%=\n\t" + "BLE L_sp_3072_sqr_96_outer\n\t" #else - "BLE.N L_sp_3072_sqr_96_outer_%=\n\t" + "BLE.N L_sp_3072_sqr_96_outer\n\t" #endif + "LDR lr, [%[a], #380]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_3072_sqr_96_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_3072_sqr_96_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_sqr_96_store_%=\n\t" + "BGT L_sp_3072_sqr_96_store\n\t" #else - "BGT.N L_sp_3072_sqr_96_store_%=\n\t" + "BGT.N L_sp_3072_sqr_96_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -13247,7 +13269,7 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r3, #0x0\n\t" "ADD r12, %[a], #0xc0\n\t" "\n" - "L_sp_3072_add_48_word_%=:\n\t" + "L_sp_3072_add_48_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -13260,9 +13282,9 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_3072_add_48_word_%=\n\t" + "BNE L_sp_3072_add_48_word\n\t" #else - "BNE.N L_sp_3072_add_48_word_%=\n\t" + "BNE.N L_sp_3072_add_48_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -13294,7 +13316,7 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0xc0\n\t" "\n" - "L_sp_3072_sub_in_pkace_48_word_%=:\n\t" + "L_sp_3072_sub_in_pkace_48_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -13306,9 +13328,9 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "BNE L_sp_3072_sub_in_pkace_48_word\n\t" #else - "BNE.N L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "BNE.N L_sp_3072_sub_in_pkace_48_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -13340,61 +13362,80 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x180\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_3072_mul_48_outer_%=:\n\t" + "L_sp_3072_mul_48_outer:\n\t" "SUBS r3, r5, #0xbc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_3072_mul_48_inner_%=:\n\t" + "L_sp_3072_mul_48_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0xc0\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_mul_48_inner_done_%=\n\t" + "BGT L_sp_3072_mul_48_inner_done\n\t" #else - "BEQ.N L_sp_3072_mul_48_inner_done_%=\n\t" + "BGT.N L_sp_3072_mul_48_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_mul_48_inner_%=\n\t" + "BLT L_sp_3072_mul_48_inner\n\t" #else - "BLE.N L_sp_3072_mul_48_inner_%=\n\t" + "BLT.N L_sp_3072_mul_48_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_3072_mul_48_inner_done_%=:\n\t" + "L_sp_3072_mul_48_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x178\n\t" + "CMP r5, #0x174\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_mul_48_outer_%=\n\t" + "BLE L_sp_3072_mul_48_outer\n\t" #else - "BLE.N L_sp_3072_mul_48_outer_%=\n\t" + "BLE.N L_sp_3072_mul_48_outer\n\t" #endif + "LDR lr, [%[a], #188]\n\t" + "LDR r11, [%[b], #188]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_3072_mul_48_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_3072_mul_48_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_mul_48_store_%=\n\t" + "BGT L_sp_3072_mul_48_store\n\t" #else - "BGT.N L_sp_3072_mul_48_store_%=\n\t" + "BGT.N L_sp_3072_mul_48_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -13420,24 +13461,20 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x180\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_3072_sqr_48_outer_%=:\n\t" + "L_sp_3072_sqr_48_outer:\n\t" "SUBS r3, r5, #0xbc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_3072_sqr_48_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_sqr_48_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_3072_sqr_48_op_sqr_%=\n\t" -#endif + "L_sp_3072_sqr_48_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -13447,59 +13484,51 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_3072_sqr_48_op_done_%=\n\t" - "\n" - "L_sp_3072_sqr_48_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_3072_sqr_48_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0xc0\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_3072_sqr_48_inner_done_%=\n\t" -#else - "BEQ.N L_sp_3072_sqr_48_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_sqr_48_inner_done_%=\n\t" + "BGT L_sp_3072_sqr_48_inner_done\n\t" #else - "BGT.N L_sp_3072_sqr_48_inner_done_%=\n\t" + "BGT.N L_sp_3072_sqr_48_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_sqr_48_inner_%=\n\t" + "BLT L_sp_3072_sqr_48_inner\n\t" #else - "BLE.N L_sp_3072_sqr_48_inner_%=\n\t" + "BLT.N L_sp_3072_sqr_48_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_3072_sqr_48_inner_done_%=:\n\t" + "L_sp_3072_sqr_48_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x178\n\t" + "CMP r5, #0x174\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_3072_sqr_48_outer_%=\n\t" + "BLE L_sp_3072_sqr_48_outer\n\t" #else - "BLE.N L_sp_3072_sqr_48_outer_%=\n\t" + "BLE.N L_sp_3072_sqr_48_outer\n\t" #endif + "LDR lr, [%[a], #188]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_3072_sqr_48_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_3072_sqr_48_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_3072_sqr_48_store_%=\n\t" + "BGT L_sp_3072_sqr_48_store\n\t" #else - "BGT.N L_sp_3072_sqr_48_store_%=\n\t" + "BGT.N L_sp_3072_sqr_48_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -13558,7 +13587,7 @@ static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_3072_mul_d_96_word_%=:\n\t" + "L_sp_3072_mul_d_96_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -13572,9 +13601,9 @@ static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mul_d_96_word_%=\n\t" + "BLT L_sp_3072_mul_d_96_word\n\t" #else - "BLT.N L_sp_3072_mul_d_96_word_%=\n\t" + "BLT.N L_sp_3072_mul_d_96_word\n\t" #endif "STR r3, [%[r], #384]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -14132,7 +14161,7 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_3072_cond_sub_48_words_%=:\n\t" + "L_sp_3072_cond_sub_48_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -14143,9 +14172,9 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig "ADD r5, r5, #0x4\n\t" "CMP r5, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_cond_sub_48_words_%=\n\t" + "BLT L_sp_3072_cond_sub_48_words\n\t" #else - "BLT.N L_sp_3072_cond_sub_48_words_%=\n\t" + "BLT.N L_sp_3072_cond_sub_48_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -14384,7 +14413,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=:\n\t" + "L_sp_3072_mont_reduce_48_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -14775,9 +14804,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0xc0\n\t" #ifdef __GNUC__ - "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_word\n\t" #else - "BLT.W L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT.W L_sp_3072_mont_reduce_48_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -14816,7 +14845,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=:\n\t" + "L_sp_3072_mont_reduce_48_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -14824,7 +14853,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_48_mul_%=:\n\t" + "L_sp_3072_mont_reduce_48_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -14867,9 +14896,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_48_mul_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_mul\n\t" #else - "BLT.N L_sp_3072_mont_reduce_48_mul_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_48_mul\n\t" #endif "LDR r10, [%[a], #192]\n\t" "ADDS r4, r4, r3\n\t" @@ -14883,9 +14912,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_word\n\t" #else - "BLT.N L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_48_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -14927,7 +14956,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=:\n\t" + "L_sp_3072_mont_reduce_48_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -15175,9 +15204,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0xc0\n\t" #ifdef __GNUC__ - "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_word\n\t" #else - "BLT.W L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT.W L_sp_3072_mont_reduce_48_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -15219,7 +15248,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=:\n\t" + "L_sp_3072_mont_reduce_48_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -15227,7 +15256,7 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_48_mul_%=:\n\t" + "L_sp_3072_mont_reduce_48_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -15258,9 +15287,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_48_mul_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_mul\n\t" #else - "BLT.N L_sp_3072_mont_reduce_48_mul_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_48_mul\n\t" #endif "LDR r10, [%[a], #192]\n\t" "ADDS r4, r4, r3\n\t" @@ -15274,9 +15303,9 @@ static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_48_word\n\t" #else - "BLT.N L_sp_3072_mont_reduce_48_word_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_48_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -15347,7 +15376,7 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_3072_mul_d_48_word_%=:\n\t" + "L_sp_3072_mul_d_48_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -15361,9 +15390,9 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mul_d_48_word_%=\n\t" + "BLT L_sp_3072_mul_d_48_word\n\t" #else - "BLT.N L_sp_3072_mul_d_48_word_%=\n\t" + "BLT.N L_sp_3072_mul_d_48_word\n\t" #endif "STR r3, [%[r], #192]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -15650,9 +15679,9 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -15715,9 +15744,9 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -15741,7 +15770,7 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_3072_word_48_bit_%=:\n\t" + "L_div_3072_word_48_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -15751,7 +15780,7 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_3072_word_48_bit_%=\n\t" + "bpl L_div_3072_word_48_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -15803,7 +15832,7 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0xbc\n\t" "\n" - "L_sp_3072_cmp_48_words_%=:\n\t" + "L_sp_3072_cmp_48_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -15816,7 +15845,7 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_3072_cmp_48_words_%=\n\t" + "bcs L_sp_3072_cmp_48_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #188]\n\t" @@ -16780,7 +16809,7 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_3072_cond_sub_96_words_%=:\n\t" + "L_sp_3072_cond_sub_96_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -16791,9 +16820,9 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig "ADD r5, r5, #0x4\n\t" "CMP r5, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_cond_sub_96_words_%=\n\t" + "BLT L_sp_3072_cond_sub_96_words\n\t" #else - "BLT.N L_sp_3072_cond_sub_96_words_%=\n\t" + "BLT.N L_sp_3072_cond_sub_96_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -17200,7 +17229,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=:\n\t" + "L_sp_3072_mont_reduce_96_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -17975,9 +18004,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x180\n\t" #ifdef __GNUC__ - "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_word\n\t" #else - "BLT.W L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT.W L_sp_3072_mont_reduce_96_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -18016,7 +18045,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=:\n\t" + "L_sp_3072_mont_reduce_96_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -18024,7 +18053,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_96_mul_%=:\n\t" + "L_sp_3072_mont_reduce_96_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -18067,9 +18096,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_96_mul_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_mul\n\t" #else - "BLT.N L_sp_3072_mont_reduce_96_mul_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_96_mul\n\t" #endif "LDR r10, [%[a], #384]\n\t" "ADDS r4, r4, r3\n\t" @@ -18083,9 +18112,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_word\n\t" #else - "BLT.N L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_96_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -18127,7 +18156,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=:\n\t" + "L_sp_3072_mont_reduce_96_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -18615,9 +18644,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x180\n\t" #ifdef __GNUC__ - "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_word\n\t" #else - "BLT.W L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT.W L_sp_3072_mont_reduce_96_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -18659,7 +18688,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=:\n\t" + "L_sp_3072_mont_reduce_96_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -18667,7 +18696,7 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_3072_mont_reduce_96_mul_%=:\n\t" + "L_sp_3072_mont_reduce_96_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -18698,9 +18727,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_96_mul_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_mul\n\t" #else - "BLT.N L_sp_3072_mont_reduce_96_mul_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_96_mul\n\t" #endif "LDR r10, [%[a], #384]\n\t" "ADDS r4, r4, r3\n\t" @@ -18714,9 +18743,9 @@ static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x180\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT L_sp_3072_mont_reduce_96_word\n\t" #else - "BLT.N L_sp_3072_mont_reduce_96_word_%=\n\t" + "BLT.N L_sp_3072_mont_reduce_96_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -18782,7 +18811,7 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x180\n\t" "\n" - "L_sp_3072_sub_96_word_%=:\n\t" + "L_sp_3072_sub_96_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -18794,9 +18823,9 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_3072_sub_96_word_%=\n\t" + "BNE L_sp_3072_sub_96_word\n\t" #else - "BNE.N L_sp_3072_sub_96_word_%=\n\t" + "BNE.N L_sp_3072_sub_96_word\n\t" #endif "MOV %[r], r11\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -19014,9 +19043,9 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -19079,9 +19108,9 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -19105,7 +19134,7 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_3072_word_96_bit_%=:\n\t" + "L_div_3072_word_96_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -19115,7 +19144,7 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_3072_word_96_bit_%=\n\t" + "bpl L_div_3072_word_96_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -19270,7 +19299,7 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x17c\n\t" "\n" - "L_sp_3072_cmp_96_words_%=:\n\t" + "L_sp_3072_cmp_96_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -19283,7 +19312,7 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_3072_cmp_96_words_%=\n\t" + "bcs L_sp_3072_cmp_96_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #380]\n\t" @@ -20898,7 +20927,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_dig "MOV r8, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_3072_cond_add_48_words_%=:\n\t" + "L_sp_3072_cond_add_48_words:\n\t" "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" @@ -20909,9 +20938,9 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_dig "ADD r4, r4, #0x4\n\t" "CMP r4, #0xc0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_3072_cond_add_48_words_%=\n\t" + "BLT L_sp_3072_cond_add_48_words\n\t" #else - "BLT.N L_sp_3072_cond_add_48_words_%=\n\t" + "BLT.N L_sp_3072_cond_add_48_words\n\t" #endif "MOV %[r], r5\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -23042,7 +23071,7 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x200\n\t" "\n" - "L_sp_4096_add_128_word_%=:\n\t" + "L_sp_4096_add_128_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -23055,9 +23084,9 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_4096_add_128_word_%=\n\t" + "BNE L_sp_4096_add_128_word\n\t" #else - "BNE.N L_sp_4096_add_128_word_%=\n\t" + "BNE.N L_sp_4096_add_128_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -23089,7 +23118,7 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x200\n\t" "\n" - "L_sp_4096_sub_in_pkace_128_word_%=:\n\t" + "L_sp_4096_sub_in_pkace_128_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -23101,9 +23130,9 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "BNE L_sp_4096_sub_in_pkace_128_word\n\t" #else - "BNE.N L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "BNE.N L_sp_4096_sub_in_pkace_128_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -23135,61 +23164,80 @@ static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x400\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_4096_mul_128_outer_%=:\n\t" + "L_sp_4096_mul_128_outer:\n\t" "SUBS r3, r5, #0x1fc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_4096_mul_128_inner_%=:\n\t" + "L_sp_4096_mul_128_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x200\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_4096_mul_128_inner_done_%=\n\t" + "BGT L_sp_4096_mul_128_inner_done\n\t" #else - "BEQ.N L_sp_4096_mul_128_inner_done_%=\n\t" + "BGT.N L_sp_4096_mul_128_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_4096_mul_128_inner_%=\n\t" + "BLT L_sp_4096_mul_128_inner\n\t" #else - "BLE.N L_sp_4096_mul_128_inner_%=\n\t" + "BLT.N L_sp_4096_mul_128_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_4096_mul_128_inner_done_%=:\n\t" + "L_sp_4096_mul_128_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x3f8\n\t" + "CMP r5, #0x3f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_4096_mul_128_outer_%=\n\t" + "BLE L_sp_4096_mul_128_outer\n\t" #else - "BLE.N L_sp_4096_mul_128_outer_%=\n\t" + "BLE.N L_sp_4096_mul_128_outer\n\t" #endif + "LDR lr, [%[a], #508]\n\t" + "LDR r11, [%[b], #508]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_4096_mul_128_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_4096_mul_128_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_4096_mul_128_store_%=\n\t" + "BGT L_sp_4096_mul_128_store\n\t" #else - "BGT.N L_sp_4096_mul_128_store_%=\n\t" + "BGT.N L_sp_4096_mul_128_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -23215,24 +23263,20 @@ static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x400\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_4096_sqr_128_outer_%=:\n\t" + "L_sp_4096_sqr_128_outer:\n\t" "SUBS r3, r5, #0x1fc\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_4096_sqr_128_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_4096_sqr_128_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_4096_sqr_128_op_sqr_%=\n\t" -#endif + "L_sp_4096_sqr_128_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -23242,59 +23286,51 @@ static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_4096_sqr_128_op_done_%=\n\t" - "\n" - "L_sp_4096_sqr_128_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_4096_sqr_128_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x200\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_4096_sqr_128_inner_done_%=\n\t" -#else - "BEQ.N L_sp_4096_sqr_128_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_4096_sqr_128_inner_done_%=\n\t" + "BGT L_sp_4096_sqr_128_inner_done\n\t" #else - "BGT.N L_sp_4096_sqr_128_inner_done_%=\n\t" + "BGT.N L_sp_4096_sqr_128_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_4096_sqr_128_inner_%=\n\t" + "BLT L_sp_4096_sqr_128_inner\n\t" #else - "BLE.N L_sp_4096_sqr_128_inner_%=\n\t" + "BLT.N L_sp_4096_sqr_128_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_4096_sqr_128_inner_done_%=:\n\t" + "L_sp_4096_sqr_128_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x3f8\n\t" + "CMP r5, #0x3f4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_4096_sqr_128_outer_%=\n\t" + "BLE L_sp_4096_sqr_128_outer\n\t" #else - "BLE.N L_sp_4096_sqr_128_outer_%=\n\t" + "BLE.N L_sp_4096_sqr_128_outer\n\t" #endif + "LDR lr, [%[a], #508]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_4096_sqr_128_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_4096_sqr_128_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_4096_sqr_128_store_%=\n\t" + "BGT L_sp_4096_sqr_128_store\n\t" #else - "BGT.N L_sp_4096_sqr_128_store_%=\n\t" + "BGT.N L_sp_4096_sqr_128_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -23351,7 +23387,7 @@ static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_4096_mul_d_128_word_%=:\n\t" + "L_sp_4096_mul_d_128_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -23365,9 +23401,9 @@ static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_mul_d_128_word_%=\n\t" + "BLT L_sp_4096_mul_d_128_word\n\t" #else - "BLT.N L_sp_4096_mul_d_128_word_%=\n\t" + "BLT.N L_sp_4096_mul_d_128_word\n\t" #endif "STR r3, [%[r], #512]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -24086,7 +24122,7 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_4096_cond_sub_128_words_%=:\n\t" + "L_sp_4096_cond_sub_128_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -24097,9 +24133,9 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di "ADD r5, r5, #0x4\n\t" "CMP r5, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_cond_sub_128_words_%=\n\t" + "BLT L_sp_4096_cond_sub_128_words\n\t" #else - "BLT.N L_sp_4096_cond_sub_128_words_%=\n\t" + "BLT.N L_sp_4096_cond_sub_128_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -24618,7 +24654,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=:\n\t" + "L_sp_4096_mont_reduce_128_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -25649,9 +25685,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x200\n\t" #ifdef __GNUC__ - "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_word\n\t" #else - "BLT.W L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT.W L_sp_4096_mont_reduce_128_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -25690,7 +25726,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=:\n\t" + "L_sp_4096_mont_reduce_128_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -25698,7 +25734,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_4096_mont_reduce_128_mul_%=:\n\t" + "L_sp_4096_mont_reduce_128_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -25741,9 +25777,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_mont_reduce_128_mul_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_mul\n\t" #else - "BLT.N L_sp_4096_mont_reduce_128_mul_%=\n\t" + "BLT.N L_sp_4096_mont_reduce_128_mul\n\t" #endif "LDR r10, [%[a], #512]\n\t" "ADDS r4, r4, r3\n\t" @@ -25757,9 +25793,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_word\n\t" #else - "BLT.N L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT.N L_sp_4096_mont_reduce_128_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -25801,7 +25837,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=:\n\t" + "L_sp_4096_mont_reduce_128_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -26449,9 +26485,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x200\n\t" #ifdef __GNUC__ - "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_word\n\t" #else - "BLT.W L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT.W L_sp_4096_mont_reduce_128_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -26493,7 +26529,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) /* ca = 0 */ "MOV r3, #0x0\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=:\n\t" + "L_sp_4096_mont_reduce_128_word:\n\t" /* mu = a[i] * mp */ "LDR r10, [%[a]]\n\t" "MUL r8, %[mp], r10\n\t" @@ -26501,7 +26537,7 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "MOV r12, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_4096_mont_reduce_128_mul_%=:\n\t" + "L_sp_4096_mont_reduce_128_mul:\n\t" /* a[i+j+0] += m[j+0] * mu */ "LDR r7, [%[m], r12]\n\t" "LDR r10, [%[a], r12]\n\t" @@ -26532,9 +26568,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD r12, r12, #0x4\n\t" "CMP r12, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_mont_reduce_128_mul_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_mul\n\t" #else - "BLT.N L_sp_4096_mont_reduce_128_mul_%=\n\t" + "BLT.N L_sp_4096_mont_reduce_128_mul\n\t" #endif "LDR r10, [%[a], #512]\n\t" "ADDS r4, r4, r3\n\t" @@ -26548,9 +26584,9 @@ static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r9, #0x200\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT L_sp_4096_mont_reduce_128_word\n\t" #else - "BLT.N L_sp_4096_mont_reduce_128_word_%=\n\t" + "BLT.N L_sp_4096_mont_reduce_128_word\n\t" #endif /* Loop Done */ "MOV %[mp], r3\n\t" @@ -26616,7 +26652,7 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x200\n\t" "\n" - "L_sp_4096_sub_128_word_%=:\n\t" + "L_sp_4096_sub_128_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -26628,9 +26664,9 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_4096_sub_128_word_%=\n\t" + "BNE L_sp_4096_sub_128_word\n\t" #else - "BNE.N L_sp_4096_sub_128_word_%=\n\t" + "BNE.N L_sp_4096_sub_128_word\n\t" #endif "MOV %[r], r11\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -26904,9 +26940,9 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -26969,9 +27005,9 @@ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -26995,7 +27031,7 @@ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_4096_word_128_bit_%=:\n\t" + "L_div_4096_word_128_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -27005,7 +27041,7 @@ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_4096_word_128_bit_%=\n\t" + "bpl L_div_4096_word_128_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -27160,7 +27196,7 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x1fc\n\t" "\n" - "L_sp_4096_cmp_128_words_%=:\n\t" + "L_sp_4096_cmp_128_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -27173,7 +27209,7 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_4096_cmp_128_words_%=\n\t" + "bcs L_sp_4096_cmp_128_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #508]\n\t" @@ -29140,7 +29176,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_dig "MOV r8, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_4096_cond_add_64_words_%=:\n\t" + "L_sp_4096_cond_add_64_words:\n\t" "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" @@ -29151,9 +29187,9 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_dig "ADD r4, r4, #0x4\n\t" "CMP r4, #0x100\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_4096_cond_add_64_words_%=\n\t" + "BLT L_sp_4096_cond_add_64_words\n\t" #else - "BLT.N L_sp_4096_cond_add_64_words_%=\n\t" + "BLT.N L_sp_4096_cond_add_64_words\n\t" #endif "MOV %[r], r5\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -30825,61 +30861,80 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_256_mul_8_outer_%=:\n\t" + "L_sp_256_mul_8_outer:\n\t" "SUBS r3, r5, #0x1c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_256_mul_8_inner_%=:\n\t" + "L_sp_256_mul_8_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x20\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_mul_8_inner_done_%=\n\t" + "BGT L_sp_256_mul_8_inner_done\n\t" #else - "BEQ.N L_sp_256_mul_8_inner_done_%=\n\t" + "BGT.N L_sp_256_mul_8_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_256_mul_8_inner_%=\n\t" + "BLT L_sp_256_mul_8_inner\n\t" #else - "BLE.N L_sp_256_mul_8_inner_%=\n\t" + "BLT.N L_sp_256_mul_8_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_256_mul_8_inner_done_%=:\n\t" + "L_sp_256_mul_8_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x38\n\t" + "CMP r5, #0x34\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_256_mul_8_outer_%=\n\t" + "BLE L_sp_256_mul_8_outer\n\t" #else - "BLE.N L_sp_256_mul_8_outer_%=\n\t" + "BLE.N L_sp_256_mul_8_outer\n\t" #endif + "LDR lr, [%[a], #28]\n\t" + "LDR r11, [%[b], #28]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_256_mul_8_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_256_mul_8_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_256_mul_8_store_%=\n\t" + "BGT L_sp_256_mul_8_store\n\t" #else - "BGT.N L_sp_256_mul_8_store_%=\n\t" + "BGT.N L_sp_256_mul_8_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -31411,24 +31466,20 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_256_sqr_8_outer_%=:\n\t" + "L_sp_256_sqr_8_outer:\n\t" "SUBS r3, r5, #0x1c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_256_sqr_8_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_sqr_8_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_256_sqr_8_op_sqr_%=\n\t" -#endif + "L_sp_256_sqr_8_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -31438,59 +31489,51 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_256_sqr_8_op_done_%=\n\t" - "\n" - "L_sp_256_sqr_8_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_256_sqr_8_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x20\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_sqr_8_inner_done_%=\n\t" -#else - "BEQ.N L_sp_256_sqr_8_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_256_sqr_8_inner_done_%=\n\t" + "BGT L_sp_256_sqr_8_inner_done\n\t" #else - "BGT.N L_sp_256_sqr_8_inner_done_%=\n\t" + "BGT.N L_sp_256_sqr_8_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_256_sqr_8_inner_%=\n\t" + "BLT L_sp_256_sqr_8_inner\n\t" #else - "BLE.N L_sp_256_sqr_8_inner_%=\n\t" + "BLT.N L_sp_256_sqr_8_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_256_sqr_8_inner_done_%=:\n\t" + "L_sp_256_sqr_8_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x38\n\t" + "CMP r5, #0x34\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_256_sqr_8_outer_%=\n\t" + "BLE L_sp_256_sqr_8_outer\n\t" #else - "BLE.N L_sp_256_sqr_8_outer_%=\n\t" + "BLE.N L_sp_256_sqr_8_outer\n\t" #endif + "LDR lr, [%[a], #28]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_256_sqr_8_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_256_sqr_8_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_256_sqr_8_store_%=\n\t" + "BGT L_sp_256_sqr_8_store\n\t" #else - "BGT.N L_sp_256_sqr_8_store_%=\n\t" + "BGT.N L_sp_256_sqr_8_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -31896,7 +31939,7 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x20\n\t" "\n" - "L_sp_256_add_8_word_%=:\n\t" + "L_sp_256_add_8_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -31909,9 +31952,9 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_256_add_8_word_%=\n\t" + "BNE L_sp_256_add_8_word\n\t" #else - "BNE.N L_sp_256_add_8_word_%=\n\t" + "BNE.N L_sp_256_add_8_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -33818,8 +33861,8 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_8(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_8(r, a, m, mp); for (; n > 1; n--) { @@ -33931,7 +33974,7 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x1c\n\t" "\n" - "L_sp_256_cmp_8_words_%=:\n\t" + "L_sp_256_cmp_8_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -33944,7 +33987,7 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_256_cmp_8_words_%=\n\t" + "bcs L_sp_256_cmp_8_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #28]\n\t" @@ -34078,7 +34121,7 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_256_cond_sub_8_words_%=:\n\t" + "L_sp_256_cond_sub_8_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -34089,9 +34132,9 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit "ADD r5, r5, #0x4\n\t" "CMP r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_256_cond_sub_8_words_%=\n\t" + "BLT L_sp_256_cond_sub_8_words\n\t" #else - "BLT.N L_sp_256_cond_sub_8_words_%=\n\t" + "BLT.N L_sp_256_cond_sub_8_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -34192,7 +34235,7 @@ static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_8_word_%=:\n\t" + "L_sp_256_mont_reduce_8_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -34263,9 +34306,9 @@ static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x20\n\t" #ifdef __GNUC__ - "BLT L_sp_256_mont_reduce_8_word_%=\n\t" + "BLT L_sp_256_mont_reduce_8_word\n\t" #else - "BLT.W L_sp_256_mont_reduce_8_word_%=\n\t" + "BLT.W L_sp_256_mont_reduce_8_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -34307,7 +34350,7 @@ static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_256_mont_reduce_8_word_%=:\n\t" + "L_sp_256_mont_reduce_8_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -34355,9 +34398,9 @@ static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x20\n\t" #ifdef __GNUC__ - "BLT L_sp_256_mont_reduce_8_word_%=\n\t" + "BLT L_sp_256_mont_reduce_8_word\n\t" #else - "BLT.W L_sp_256_mont_reduce_8_word_%=\n\t" + "BLT.W L_sp_256_mont_reduce_8_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -34566,7 +34609,7 @@ static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_order_8_word_%=:\n\t" + "L_sp_256_mont_reduce_order_8_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -34637,9 +34680,9 @@ static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x20\n\t" #ifdef __GNUC__ - "BLT L_sp_256_mont_reduce_order_8_word_%=\n\t" + "BLT L_sp_256_mont_reduce_order_8_word\n\t" #else - "BLT.W L_sp_256_mont_reduce_order_8_word_%=\n\t" + "BLT.W L_sp_256_mont_reduce_order_8_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -34681,7 +34724,7 @@ static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_256_mont_reduce_order_8_word_%=:\n\t" + "L_sp_256_mont_reduce_order_8_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -34729,9 +34772,9 @@ static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x20\n\t" #ifdef __GNUC__ - "BLT L_sp_256_mont_reduce_order_8_word_%=\n\t" + "BLT L_sp_256_mont_reduce_order_8_word\n\t" #else - "BLT.W L_sp_256_mont_reduce_order_8_word_%=\n\t" + "BLT.W L_sp_256_mont_reduce_order_8_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -39060,7 +39103,7 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x20\n\t" "\n" - "L_sp_256_sub_in_pkace_8_word_%=:\n\t" + "L_sp_256_sub_in_pkace_8_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -39072,9 +39115,9 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_256_sub_in_pkace_8_word_%=\n\t" + "BNE L_sp_256_sub_in_pkace_8_word\n\t" #else - "BNE.N L_sp_256_sub_in_pkace_8_word_%=\n\t" + "BNE.N L_sp_256_sub_in_pkace_8_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -39153,7 +39196,7 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_256_mul_d_8_word_%=:\n\t" + "L_sp_256_mul_d_8_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -39167,9 +39210,9 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_256_mul_d_8_word_%=\n\t" + "BLT L_sp_256_mul_d_8_word\n\t" #else - "BLT.N L_sp_256_mul_d_8_word_%=\n\t" + "BLT.N L_sp_256_mul_d_8_word\n\t" #endif "STR r3, [%[r], #32]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -39256,9 +39299,9 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -39321,9 +39364,9 @@ static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -39347,7 +39390,7 @@ static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_256_word_8_bit_%=:\n\t" + "L_div_256_word_8_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -39357,7 +39400,7 @@ static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_256_word_8_bit_%=\n\t" + "bpl L_div_256_word_8_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -40051,7 +40094,7 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x20\n\t" "\n" - "L_sp_256_sub_8_word_%=:\n\t" + "L_sp_256_sub_8_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -40063,9 +40106,9 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_256_sub_8_word_%=\n\t" + "BNE L_sp_256_sub_8_word\n\t" #else - "BNE.N L_sp_256_sub_8_word_%=\n\t" + "BNE.N L_sp_256_sub_8_word\n\t" #endif "MOV %[r], r11\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -40185,9 +40228,9 @@ static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "LDM %[a]!, {r4}\n\t" "ANDS r3, r4, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_div2_mod_8_even_%=\n\t" + "BEQ L_sp_256_div2_mod_8_even\n\t" #else - "BEQ.N L_sp_256_div2_mod_8_even_%=\n\t" + "BEQ.N L_sp_256_div2_mod_8_even\n\t" #endif "LDM %[a]!, {r5, r6, r7}\n\t" "LDM %[m]!, {r8, r9, r10, r11}\n\t" @@ -40203,13 +40246,17 @@ static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "ADCS r6, r6, r10\n\t" "ADCS r7, r7, r11\n\t" "ADC r3, r12, r12\n\t" - "B L_sp_256_div2_mod_8_div2_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_div2_mod_8_div2\n\t" +#else + "B.N L_sp_256_div2_mod_8_div2\n\t" +#endif "\n" - "L_sp_256_div2_mod_8_even_%=:\n\t" + "L_sp_256_div2_mod_8_even:\n\t" "LDRD r4, r5, [%[a], #12]\n\t" "LDRD r6, r7, [%[a], #20]\n\t" "\n" - "L_sp_256_div2_mod_8_div2_%=:\n\t" + "L_sp_256_div2_mod_8_div2:\n\t" "LSR r8, r4, #1\n\t" "AND r4, r4, #0x1\n\t" "LSR r9, r5, #1\n\t" @@ -40252,100 +40299,128 @@ static int sp_256_num_bits_8(const sp_digit* a) "LDR r1, [%[a], #28]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_7_%=\n\t" + "BEQ L_sp_256_num_bits_8_7\n\t" #else - "BEQ.N L_sp_256_num_bits_8_7_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_7\n\t" #endif "MOV r2, #0x100\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_7_%=:\n\t" + "L_sp_256_num_bits_8_7:\n\t" "LDR r1, [%[a], #24]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_6_%=\n\t" + "BEQ L_sp_256_num_bits_8_6\n\t" #else - "BEQ.N L_sp_256_num_bits_8_6_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_6\n\t" #endif "MOV r2, #0xe0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_6_%=:\n\t" + "L_sp_256_num_bits_8_6:\n\t" "LDR r1, [%[a], #20]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_5_%=\n\t" + "BEQ L_sp_256_num_bits_8_5\n\t" #else - "BEQ.N L_sp_256_num_bits_8_5_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_5\n\t" #endif "MOV r2, #0xc0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_5_%=:\n\t" + "L_sp_256_num_bits_8_5:\n\t" "LDR r1, [%[a], #16]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_4_%=\n\t" + "BEQ L_sp_256_num_bits_8_4\n\t" #else - "BEQ.N L_sp_256_num_bits_8_4_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_4\n\t" #endif "MOV r2, #0xa0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_4_%=:\n\t" + "L_sp_256_num_bits_8_4:\n\t" "LDR r1, [%[a], #12]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_3_%=\n\t" + "BEQ L_sp_256_num_bits_8_3\n\t" #else - "BEQ.N L_sp_256_num_bits_8_3_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_3\n\t" #endif "MOV r2, #0x80\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_3_%=:\n\t" + "L_sp_256_num_bits_8_3:\n\t" "LDR r1, [%[a], #8]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_2_%=\n\t" + "BEQ L_sp_256_num_bits_8_2\n\t" #else - "BEQ.N L_sp_256_num_bits_8_2_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_2\n\t" #endif "MOV r2, #0x60\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_2_%=:\n\t" + "L_sp_256_num_bits_8_2:\n\t" "LDR r1, [%[a], #4]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_256_num_bits_8_1_%=\n\t" + "BEQ L_sp_256_num_bits_8_1\n\t" #else - "BEQ.N L_sp_256_num_bits_8_1_%=\n\t" + "BEQ.N L_sp_256_num_bits_8_1\n\t" #endif "MOV r2, #0x40\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_256_num_bits_8_9_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_256_num_bits_8_9\n\t" +#else + "B.N L_sp_256_num_bits_8_9\n\t" +#endif "\n" - "L_sp_256_num_bits_8_1_%=:\n\t" + "L_sp_256_num_bits_8_1:\n\t" "LDR r1, [%[a]]\n\t" "MOV r2, #0x20\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" "\n" - "L_sp_256_num_bits_8_9_%=:\n\t" + "L_sp_256_num_bits_8_9:\n\t" "MOV %[a], r4\n\t" : [a] "+r" (a) : @@ -40798,7 +40873,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -40854,7 +40929,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -41460,61 +41535,80 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x60\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_384_mul_12_outer_%=:\n\t" + "L_sp_384_mul_12_outer:\n\t" "SUBS r3, r5, #0x2c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_384_mul_12_inner_%=:\n\t" + "L_sp_384_mul_12_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x30\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_mul_12_inner_done_%=\n\t" + "BGT L_sp_384_mul_12_inner_done\n\t" #else - "BEQ.N L_sp_384_mul_12_inner_done_%=\n\t" + "BGT.N L_sp_384_mul_12_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_384_mul_12_inner_%=\n\t" + "BLT L_sp_384_mul_12_inner\n\t" #else - "BLE.N L_sp_384_mul_12_inner_%=\n\t" + "BLT.N L_sp_384_mul_12_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_384_mul_12_inner_done_%=:\n\t" + "L_sp_384_mul_12_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x58\n\t" + "CMP r5, #0x54\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_384_mul_12_outer_%=\n\t" + "BLE L_sp_384_mul_12_outer\n\t" #else - "BLE.N L_sp_384_mul_12_outer_%=\n\t" + "BLE.N L_sp_384_mul_12_outer\n\t" #endif + "LDR lr, [%[a], #44]\n\t" + "LDR r11, [%[b], #44]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_384_mul_12_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_384_mul_12_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_384_mul_12_store_%=\n\t" + "BGT L_sp_384_mul_12_store\n\t" #else - "BGT.N L_sp_384_mul_12_store_%=\n\t" + "BGT.N L_sp_384_mul_12_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -42570,24 +42664,20 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x60\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_384_sqr_12_outer_%=:\n\t" + "L_sp_384_sqr_12_outer:\n\t" "SUBS r3, r5, #0x2c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_384_sqr_12_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_sqr_12_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_384_sqr_12_op_sqr_%=\n\t" -#endif + "L_sp_384_sqr_12_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -42597,59 +42687,51 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_384_sqr_12_op_done_%=\n\t" - "\n" - "L_sp_384_sqr_12_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_384_sqr_12_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x30\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_sqr_12_inner_done_%=\n\t" -#else - "BEQ.N L_sp_384_sqr_12_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_384_sqr_12_inner_done_%=\n\t" + "BGT L_sp_384_sqr_12_inner_done\n\t" #else - "BGT.N L_sp_384_sqr_12_inner_done_%=\n\t" + "BGT.N L_sp_384_sqr_12_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_384_sqr_12_inner_%=\n\t" + "BLT L_sp_384_sqr_12_inner\n\t" #else - "BLE.N L_sp_384_sqr_12_inner_%=\n\t" + "BLT.N L_sp_384_sqr_12_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_384_sqr_12_inner_done_%=:\n\t" + "L_sp_384_sqr_12_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x58\n\t" + "CMP r5, #0x54\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_384_sqr_12_outer_%=\n\t" + "BLE L_sp_384_sqr_12_outer\n\t" #else - "BLE.N L_sp_384_sqr_12_outer_%=\n\t" + "BLE.N L_sp_384_sqr_12_outer\n\t" #endif + "LDR lr, [%[a], #44]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_384_sqr_12_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_384_sqr_12_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_384_sqr_12_store_%=\n\t" + "BGT L_sp_384_sqr_12_store\n\t" #else - "BGT.N L_sp_384_sqr_12_store_%=\n\t" + "BGT.N L_sp_384_sqr_12_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -43382,7 +43464,7 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x30\n\t" "\n" - "L_sp_384_add_12_word_%=:\n\t" + "L_sp_384_add_12_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -43395,9 +43477,9 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_384_add_12_word_%=\n\t" + "BNE L_sp_384_add_12_word\n\t" #else - "BNE.N L_sp_384_add_12_word_%=\n\t" + "BNE.N L_sp_384_add_12_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -43782,7 +43864,7 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_384_cond_sub_12_words_%=:\n\t" + "L_sp_384_cond_sub_12_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -43793,9 +43875,9 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi "ADD r5, r5, #0x4\n\t" "CMP r5, #0x30\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_384_cond_sub_12_words_%=\n\t" + "BLT L_sp_384_cond_sub_12_words\n\t" #else - "BLT.N L_sp_384_cond_sub_12_words_%=\n\t" + "BLT.N L_sp_384_cond_sub_12_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -43909,7 +43991,7 @@ static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_384_mont_reduce_12_word_%=:\n\t" + "L_sp_384_mont_reduce_12_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -44012,9 +44094,9 @@ static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x30\n\t" #ifdef __GNUC__ - "BLT L_sp_384_mont_reduce_12_word_%=\n\t" + "BLT L_sp_384_mont_reduce_12_word\n\t" #else - "BLT.W L_sp_384_mont_reduce_12_word_%=\n\t" + "BLT.W L_sp_384_mont_reduce_12_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -44056,7 +44138,7 @@ static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_384_mont_reduce_12_word_%=:\n\t" + "L_sp_384_mont_reduce_12_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -44124,9 +44206,9 @@ static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x30\n\t" #ifdef __GNUC__ - "BLT L_sp_384_mont_reduce_12_word_%=\n\t" + "BLT L_sp_384_mont_reduce_12_word\n\t" #else - "BLT.W L_sp_384_mont_reduce_12_word_%=\n\t" + "BLT.W L_sp_384_mont_reduce_12_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -44182,8 +44264,8 @@ SP_NOINLINE static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_12(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_12(r, a, m, mp); for (; n > 1; n--) { @@ -44311,7 +44393,7 @@ static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x2c\n\t" "\n" - "L_sp_384_cmp_12_words_%=:\n\t" + "L_sp_384_cmp_12_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -44324,7 +44406,7 @@ static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_384_cmp_12_words_%=\n\t" + "bcs L_sp_384_cmp_12_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #44]\n\t" @@ -44614,7 +44696,7 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x30\n\t" "\n" - "L_sp_384_sub_12_word_%=:\n\t" + "L_sp_384_sub_12_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -44626,9 +44708,9 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_384_sub_12_word_%=\n\t" + "BNE L_sp_384_sub_12_word\n\t" #else - "BNE.N L_sp_384_sub_12_word_%=\n\t" + "BNE.N L_sp_384_sub_12_word\n\t" #endif "MOV %[r], r11\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -44715,7 +44797,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi "MOV r8, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_384_cond_add_12_words_%=:\n\t" + "L_sp_384_cond_add_12_words:\n\t" "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" @@ -44726,9 +44808,9 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi "ADD r4, r4, #0x4\n\t" "CMP r4, #0x30\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_384_cond_add_12_words_%=\n\t" + "BLT L_sp_384_cond_add_12_words\n\t" #else - "BLT.N L_sp_384_cond_add_12_words_%=\n\t" + "BLT.N L_sp_384_cond_add_12_words\n\t" #endif "MOV %[r], r5\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -48912,7 +48994,7 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x30\n\t" "\n" - "L_sp_384_sub_in_pkace_12_word_%=:\n\t" + "L_sp_384_sub_in_pkace_12_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -48924,9 +49006,9 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_384_sub_in_pkace_12_word_%=\n\t" + "BNE L_sp_384_sub_in_pkace_12_word\n\t" #else - "BNE.N L_sp_384_sub_in_pkace_12_word_%=\n\t" + "BNE.N L_sp_384_sub_in_pkace_12_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -49012,7 +49094,7 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_384_mul_d_12_word_%=:\n\t" + "L_sp_384_mul_d_12_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -49026,9 +49108,9 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x30\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_384_mul_d_12_word_%=\n\t" + "BLT L_sp_384_mul_d_12_word\n\t" #else - "BLT.N L_sp_384_mul_d_12_word_%=\n\t" + "BLT.N L_sp_384_mul_d_12_word\n\t" #endif "STR r3, [%[r], #48]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -49135,9 +49217,9 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -49200,9 +49282,9 @@ static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -49226,7 +49308,7 @@ static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_384_word_12_bit_%=:\n\t" + "L_div_384_word_12_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -49236,7 +49318,7 @@ static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_384_word_12_bit_%=\n\t" + "bpl L_div_384_word_12_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -49900,9 +49982,9 @@ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m "LDM %[a]!, {r4}\n\t" "ANDS r3, r4, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_div2_mod_12_even_%=\n\t" + "BEQ L_sp_384_div2_mod_12_even\n\t" #else - "BEQ.N L_sp_384_div2_mod_12_even_%=\n\t" + "BEQ.N L_sp_384_div2_mod_12_even\n\t" #endif "MOV r12, #0x0\n\t" "LDM %[a]!, {r5, r6, r7}\n\t" @@ -49927,9 +50009,13 @@ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m "ADCS r7, r7, r11\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "ADC r3, r12, r12\n\t" - "B L_sp_384_div2_mod_12_div2_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_div2_mod_12_div2\n\t" +#else + "B.N L_sp_384_div2_mod_12_div2\n\t" +#endif "\n" - "L_sp_384_div2_mod_12_even_%=:\n\t" + "L_sp_384_div2_mod_12_even:\n\t" "LDM %[a]!, {r5, r6, r7}\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" @@ -49937,7 +50023,7 @@ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m "LDM %[a]!, {r4, r5, r6, r7}\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "\n" - "L_sp_384_div2_mod_12_div2_%=:\n\t" + "L_sp_384_div2_mod_12_div2:\n\t" "SUB %[r], %[r], #0x30\n\t" "LDRD r8, r9, [%[r]]\n\t" "LSR r8, r8, #1\n\t" @@ -50006,152 +50092,196 @@ static int sp_384_num_bits_12(const sp_digit* a) "LDR r1, [%[a], #44]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_11_%=\n\t" + "BEQ L_sp_384_num_bits_12_11\n\t" #else - "BEQ.N L_sp_384_num_bits_12_11_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_11\n\t" #endif "MOV r2, #0x180\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_11_%=:\n\t" + "L_sp_384_num_bits_12_11:\n\t" "LDR r1, [%[a], #40]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_10_%=\n\t" + "BEQ L_sp_384_num_bits_12_10\n\t" #else - "BEQ.N L_sp_384_num_bits_12_10_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_10\n\t" #endif "MOV r2, #0x160\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_10_%=:\n\t" + "L_sp_384_num_bits_12_10:\n\t" "LDR r1, [%[a], #36]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_9_%=\n\t" + "BEQ L_sp_384_num_bits_12_9\n\t" #else - "BEQ.N L_sp_384_num_bits_12_9_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_9\n\t" #endif "MOV r2, #0x140\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_9_%=:\n\t" + "L_sp_384_num_bits_12_9:\n\t" "LDR r1, [%[a], #32]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_8_%=\n\t" + "BEQ L_sp_384_num_bits_12_8\n\t" #else - "BEQ.N L_sp_384_num_bits_12_8_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_8\n\t" #endif "MOV r2, #0x120\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_8_%=:\n\t" + "L_sp_384_num_bits_12_8:\n\t" "LDR r1, [%[a], #28]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_7_%=\n\t" + "BEQ L_sp_384_num_bits_12_7\n\t" #else - "BEQ.N L_sp_384_num_bits_12_7_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_7\n\t" #endif "MOV r2, #0x100\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_7_%=:\n\t" + "L_sp_384_num_bits_12_7:\n\t" "LDR r1, [%[a], #24]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_6_%=\n\t" + "BEQ L_sp_384_num_bits_12_6\n\t" #else - "BEQ.N L_sp_384_num_bits_12_6_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_6\n\t" #endif "MOV r2, #0xe0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_6_%=:\n\t" + "L_sp_384_num_bits_12_6:\n\t" "LDR r1, [%[a], #20]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_5_%=\n\t" + "BEQ L_sp_384_num_bits_12_5\n\t" #else - "BEQ.N L_sp_384_num_bits_12_5_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_5\n\t" #endif "MOV r2, #0xc0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_5_%=:\n\t" + "L_sp_384_num_bits_12_5:\n\t" "LDR r1, [%[a], #16]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_4_%=\n\t" + "BEQ L_sp_384_num_bits_12_4\n\t" #else - "BEQ.N L_sp_384_num_bits_12_4_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_4\n\t" #endif "MOV r2, #0xa0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_4_%=:\n\t" + "L_sp_384_num_bits_12_4:\n\t" "LDR r1, [%[a], #12]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_3_%=\n\t" + "BEQ L_sp_384_num_bits_12_3\n\t" #else - "BEQ.N L_sp_384_num_bits_12_3_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_3\n\t" #endif "MOV r2, #0x80\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_3_%=:\n\t" + "L_sp_384_num_bits_12_3:\n\t" "LDR r1, [%[a], #8]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_2_%=\n\t" + "BEQ L_sp_384_num_bits_12_2\n\t" #else - "BEQ.N L_sp_384_num_bits_12_2_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_2\n\t" #endif "MOV r2, #0x60\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_2_%=:\n\t" + "L_sp_384_num_bits_12_2:\n\t" "LDR r1, [%[a], #4]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_384_num_bits_12_1_%=\n\t" + "BEQ L_sp_384_num_bits_12_1\n\t" #else - "BEQ.N L_sp_384_num_bits_12_1_%=\n\t" + "BEQ.N L_sp_384_num_bits_12_1\n\t" #endif "MOV r2, #0x40\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_384_num_bits_12_13_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_384_num_bits_12_13\n\t" +#else + "B.N L_sp_384_num_bits_12_13\n\t" +#endif "\n" - "L_sp_384_num_bits_12_1_%=:\n\t" + "L_sp_384_num_bits_12_1:\n\t" "LDR r1, [%[a]]\n\t" "MOV r2, #0x20\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" "\n" - "L_sp_384_num_bits_12_13_%=:\n\t" + "L_sp_384_num_bits_12_13:\n\t" "MOV %[a], r4\n\t" : [a] "+r" (a) : @@ -50608,7 +50738,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -50664,7 +50794,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -51312,64 +51442,83 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_521_mul_17_outer_%=:\n\t" + "L_sp_521_mul_17_outer:\n\t" "SUBS r3, r5, #0x40\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_521_mul_17_inner_%=:\n\t" + "L_sp_521_mul_17_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x44\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_mul_17_inner_done_%=\n\t" + "BGT L_sp_521_mul_17_inner_done\n\t" #else - "BEQ.N L_sp_521_mul_17_inner_done_%=\n\t" + "BGT.N L_sp_521_mul_17_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_521_mul_17_inner_%=\n\t" + "BLT L_sp_521_mul_17_inner\n\t" #else - "BLE.N L_sp_521_mul_17_inner_%=\n\t" + "BLT.N L_sp_521_mul_17_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_521_mul_17_inner_done_%=:\n\t" + "L_sp_521_mul_17_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x80\n\t" + "CMP r5, #0x7c\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_521_mul_17_outer_%=\n\t" + "BLE L_sp_521_mul_17_outer\n\t" #else - "BLE.N L_sp_521_mul_17_outer_%=\n\t" + "BLE.N L_sp_521_mul_17_outer\n\t" #endif + "LDR lr, [%[a], #64]\n\t" + "LDR r11, [%[b], #64]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "LDM sp!, {r6, r7}\n\t" "STM %[r]!, {r6, r7}\n\t" "SUB r5, r5, #0x8\n\t" "\n" - "L_sp_521_mul_17_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_521_mul_17_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_521_mul_17_store_%=\n\t" + "BGT L_sp_521_mul_17_store\n\t" #else - "BGT.N L_sp_521_mul_17_store_%=\n\t" + "BGT.N L_sp_521_mul_17_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -53439,24 +53588,20 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_521_sqr_17_outer_%=:\n\t" + "L_sp_521_sqr_17_outer:\n\t" "SUBS r3, r5, #0x40\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_521_sqr_17_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_sqr_17_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_521_sqr_17_op_sqr_%=\n\t" -#endif + "L_sp_521_sqr_17_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -53466,62 +53611,54 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_521_sqr_17_op_done_%=\n\t" - "\n" - "L_sp_521_sqr_17_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_521_sqr_17_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x44\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_sqr_17_inner_done_%=\n\t" -#else - "BEQ.N L_sp_521_sqr_17_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_521_sqr_17_inner_done_%=\n\t" + "BGT L_sp_521_sqr_17_inner_done\n\t" #else - "BGT.N L_sp_521_sqr_17_inner_done_%=\n\t" + "BGT.N L_sp_521_sqr_17_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_521_sqr_17_inner_%=\n\t" + "BLT L_sp_521_sqr_17_inner\n\t" #else - "BLE.N L_sp_521_sqr_17_inner_%=\n\t" + "BLT.N L_sp_521_sqr_17_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_521_sqr_17_inner_done_%=:\n\t" + "L_sp_521_sqr_17_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0x80\n\t" + "CMP r5, #0x7c\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_521_sqr_17_outer_%=\n\t" + "BLE L_sp_521_sqr_17_outer\n\t" #else - "BLE.N L_sp_521_sqr_17_outer_%=\n\t" + "BLE.N L_sp_521_sqr_17_outer\n\t" #endif + "LDR lr, [%[a], #64]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "LDM sp!, {r6, r7}\n\t" "STM %[r]!, {r6, r7}\n\t" "SUB r5, r5, #0x8\n\t" "\n" - "L_sp_521_sqr_17_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_521_sqr_17_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_521_sqr_17_store_%=\n\t" + "BGT L_sp_521_sqr_17_store\n\t" #else - "BGT.N L_sp_521_sqr_17_store_%=\n\t" + "BGT.N L_sp_521_sqr_17_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -54838,7 +54975,7 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x40\n\t" "\n" - "L_sp_521_add_17_word_%=:\n\t" + "L_sp_521_add_17_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -54851,9 +54988,9 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_add_17_word_%=\n\t" + "BNE L_sp_521_add_17_word\n\t" #else - "BNE.N L_sp_521_add_17_word_%=\n\t" + "BNE.N L_sp_521_add_17_word\n\t" #endif "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a], {r4}\n\t" @@ -55171,7 +55308,7 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_521_cond_sub_17_words_%=:\n\t" + "L_sp_521_cond_sub_17_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -55182,9 +55319,9 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi "ADD r5, r5, #0x4\n\t" "CMP r5, #0x44\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_521_cond_sub_17_words_%=\n\t" + "BLT L_sp_521_cond_sub_17_words\n\t" #else - "BLT.N L_sp_521_cond_sub_17_words_%=\n\t" + "BLT.N L_sp_521_cond_sub_17_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -55451,19 +55588,19 @@ static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_521_mont_reduce_order_17_word_%=:\n\t" + "L_sp_521_mont_reduce_order_17_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" "CMP r11, #0x40\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_mont_reduce_order_17_nomask_%=\n\t" + "BNE L_sp_521_mont_reduce_order_17_nomask\n\t" #else - "BNE.N L_sp_521_mont_reduce_order_17_nomask_%=\n\t" + "BNE.N L_sp_521_mont_reduce_order_17_nomask\n\t" #endif "MOV r9, #0x1ff\n\t" "AND r10, r10, r9\n\t" "\n" - "L_sp_521_mont_reduce_order_17_nomask_%=:\n\t" + "L_sp_521_mont_reduce_order_17_nomask:\n\t" /* a[i+0] += m[0] * mu */ "MOV r7, #0x0\n\t" "UMLAL r4, r7, r10, lr\n\t" @@ -55605,9 +55742,9 @@ static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x44\n\t" #ifdef __GNUC__ - "BLT L_sp_521_mont_reduce_order_17_word_%=\n\t" + "BLT L_sp_521_mont_reduce_order_17_word\n\t" #else - "BLT.W L_sp_521_mont_reduce_order_17_word_%=\n\t" + "BLT.W L_sp_521_mont_reduce_order_17_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -55719,19 +55856,19 @@ static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_521_mont_reduce_order_17_word_%=:\n\t" + "L_sp_521_mont_reduce_order_17_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" "CMP r4, #0x40\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_mont_reduce_order_17_nomask_%=\n\t" + "BNE L_sp_521_mont_reduce_order_17_nomask\n\t" #else - "BNE.N L_sp_521_mont_reduce_order_17_nomask_%=\n\t" + "BNE.N L_sp_521_mont_reduce_order_17_nomask\n\t" #endif "MOV r12, #0x1ff\n\t" "AND lr, lr, r12\n\t" "\n" - "L_sp_521_mont_reduce_order_17_nomask_%=:\n\t" + "L_sp_521_mont_reduce_order_17_nomask:\n\t" /* a[i+0] += m[0] * mu */ "LDR r12, [%[m]]\n\t" "MOV r3, #0x0\n\t" @@ -55823,9 +55960,9 @@ static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x44\n\t" #ifdef __GNUC__ - "BLT L_sp_521_mont_reduce_order_17_word_%=\n\t" + "BLT L_sp_521_mont_reduce_order_17_word\n\t" #else - "BLT.W L_sp_521_mont_reduce_order_17_word_%=\n\t" + "BLT.W L_sp_521_mont_reduce_order_17_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -55951,8 +56088,8 @@ SP_NOINLINE static void sp_521_mont_sqr_17(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_17(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_17(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_17(r, a, m, mp); for (; n > 1; n--) { @@ -56077,7 +56214,7 @@ static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x40\n\t" "\n" - "L_sp_521_cmp_17_words_%=:\n\t" + "L_sp_521_cmp_17_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -56090,7 +56227,7 @@ static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_521_cmp_17_words_%=\n\t" + "bcs L_sp_521_cmp_17_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #64]\n\t" @@ -61870,7 +62007,7 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x40\n\t" "\n" - "L_sp_521_sub_in_pkace_17_word_%=:\n\t" + "L_sp_521_sub_in_pkace_17_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -61882,9 +62019,9 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_sub_in_pkace_17_word_%=\n\t" + "BNE L_sp_521_sub_in_pkace_17_word\n\t" #else - "BNE.N L_sp_521_sub_in_pkace_17_word_%=\n\t" + "BNE.N L_sp_521_sub_in_pkace_17_word\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2}\n\t" @@ -61986,7 +62123,7 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_521_mul_d_17_word_%=:\n\t" + "L_sp_521_mul_d_17_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -62000,9 +62137,9 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x44\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_521_mul_d_17_word_%=\n\t" + "BLT L_sp_521_mul_d_17_word\n\t" #else - "BLT.N L_sp_521_mul_d_17_word_%=\n\t" + "BLT.N L_sp_521_mul_d_17_word\n\t" #endif "STR r3, [%[r], #68]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -62134,9 +62271,9 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -62199,9 +62336,9 @@ static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -62225,7 +62362,7 @@ static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_521_word_17_bit_%=:\n\t" + "L_div_521_word_17_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -62235,7 +62372,7 @@ static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_521_word_17_bit_%=\n\t" + "bpl L_div_521_word_17_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -62930,7 +63067,7 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) "MOV r11, #0x0\n\t" "ADD r12, %[a], #0x40\n\t" "\n" - "L_sp_521_sub_17_word_%=:\n\t" + "L_sp_521_sub_17_word:\n\t" "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3, r4, r5, r6}\n\t" "LDM %[b]!, {r7, r8, r9, r10}\n\t" @@ -62942,9 +63079,9 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) "SBC r11, r3, r3\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_sub_17_word_%=\n\t" + "BNE L_sp_521_sub_17_word\n\t" #else - "BNE.N L_sp_521_sub_17_word_%=\n\t" + "BNE.N L_sp_521_sub_17_word\n\t" #endif "RSBS r11, r11, #0x0\n\t" "LDM %[a]!, {r3}\n\t" @@ -63042,9 +63179,9 @@ static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m "LDM %[a]!, {r4}\n\t" "ANDS r3, r4, #0x1\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_div2_mod_17_even_%=\n\t" + "BEQ L_sp_521_div2_mod_17_even\n\t" #else - "BEQ.N L_sp_521_div2_mod_17_even_%=\n\t" + "BEQ.N L_sp_521_div2_mod_17_even\n\t" #endif "MOV r12, #0x0\n\t" "LDM %[a]!, {r5, r6, r7}\n\t" @@ -63080,9 +63217,13 @@ static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m "ADCS r4, r4, r8\n\t" "STM %[r]!, {r4}\n\t" "ADC r3, r12, r12\n\t" - "B L_sp_521_div2_mod_17_div2_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_div2_mod_17_div2\n\t" +#else + "B.N L_sp_521_div2_mod_17_div2\n\t" +#endif "\n" - "L_sp_521_div2_mod_17_even_%=:\n\t" + "L_sp_521_div2_mod_17_even:\n\t" "LDM %[a]!, {r5, r6, r7}\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" @@ -63094,7 +63235,7 @@ static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m "LDM %[a]!, {r4}\n\t" "STM %[r]!, {r4}\n\t" "\n" - "L_sp_521_div2_mod_17_div2_%=:\n\t" + "L_sp_521_div2_mod_17_div2:\n\t" "SUB %[r], %[r], #0x44\n\t" "LDRD r8, r9, [%[r]]\n\t" "LSR r8, r8, #1\n\t" @@ -63183,217 +63324,281 @@ static int sp_521_num_bits_17(const sp_digit* a) "LDR r1, [%[a], #64]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_16_%=\n\t" + "BEQ L_sp_521_num_bits_17_16\n\t" #else - "BEQ.N L_sp_521_num_bits_17_16_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_16\n\t" #endif "MOV r2, #0x220\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_16_%=:\n\t" + "L_sp_521_num_bits_17_16:\n\t" "LDR r1, [%[a], #60]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_15_%=\n\t" + "BEQ L_sp_521_num_bits_17_15\n\t" #else - "BEQ.N L_sp_521_num_bits_17_15_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_15\n\t" #endif "MOV r2, #0x200\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_15_%=:\n\t" + "L_sp_521_num_bits_17_15:\n\t" "LDR r1, [%[a], #56]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_14_%=\n\t" + "BEQ L_sp_521_num_bits_17_14\n\t" #else - "BEQ.N L_sp_521_num_bits_17_14_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_14\n\t" #endif "MOV r2, #0x1e0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_14_%=:\n\t" + "L_sp_521_num_bits_17_14:\n\t" "LDR r1, [%[a], #52]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_13_%=\n\t" + "BEQ L_sp_521_num_bits_17_13\n\t" #else - "BEQ.N L_sp_521_num_bits_17_13_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_13\n\t" #endif "MOV r2, #0x1c0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_13_%=:\n\t" + "L_sp_521_num_bits_17_13:\n\t" "LDR r1, [%[a], #48]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_12_%=\n\t" + "BEQ L_sp_521_num_bits_17_12\n\t" #else - "BEQ.N L_sp_521_num_bits_17_12_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_12\n\t" #endif "MOV r2, #0x1a0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_12_%=:\n\t" + "L_sp_521_num_bits_17_12:\n\t" "LDR r1, [%[a], #44]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_11_%=\n\t" + "BEQ L_sp_521_num_bits_17_11\n\t" #else - "BEQ.N L_sp_521_num_bits_17_11_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_11\n\t" #endif "MOV r2, #0x180\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_11_%=:\n\t" + "L_sp_521_num_bits_17_11:\n\t" "LDR r1, [%[a], #40]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_10_%=\n\t" + "BEQ L_sp_521_num_bits_17_10\n\t" #else - "BEQ.N L_sp_521_num_bits_17_10_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_10\n\t" #endif "MOV r2, #0x160\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_10_%=:\n\t" + "L_sp_521_num_bits_17_10:\n\t" "LDR r1, [%[a], #36]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_9_%=\n\t" + "BEQ L_sp_521_num_bits_17_9\n\t" #else - "BEQ.N L_sp_521_num_bits_17_9_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_9\n\t" #endif "MOV r2, #0x140\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_9_%=:\n\t" + "L_sp_521_num_bits_17_9:\n\t" "LDR r1, [%[a], #32]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_8_%=\n\t" + "BEQ L_sp_521_num_bits_17_8\n\t" #else - "BEQ.N L_sp_521_num_bits_17_8_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_8\n\t" #endif "MOV r2, #0x120\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_8_%=:\n\t" + "L_sp_521_num_bits_17_8:\n\t" "LDR r1, [%[a], #28]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_7_%=\n\t" + "BEQ L_sp_521_num_bits_17_7\n\t" #else - "BEQ.N L_sp_521_num_bits_17_7_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_7\n\t" #endif "MOV r2, #0x100\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_7_%=:\n\t" + "L_sp_521_num_bits_17_7:\n\t" "LDR r1, [%[a], #24]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_6_%=\n\t" + "BEQ L_sp_521_num_bits_17_6\n\t" #else - "BEQ.N L_sp_521_num_bits_17_6_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_6\n\t" #endif "MOV r2, #0xe0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_6_%=:\n\t" + "L_sp_521_num_bits_17_6:\n\t" "LDR r1, [%[a], #20]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_5_%=\n\t" + "BEQ L_sp_521_num_bits_17_5\n\t" #else - "BEQ.N L_sp_521_num_bits_17_5_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_5\n\t" #endif "MOV r2, #0xc0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_5_%=:\n\t" + "L_sp_521_num_bits_17_5:\n\t" "LDR r1, [%[a], #16]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_4_%=\n\t" + "BEQ L_sp_521_num_bits_17_4\n\t" #else - "BEQ.N L_sp_521_num_bits_17_4_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_4\n\t" #endif "MOV r2, #0xa0\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_4_%=:\n\t" + "L_sp_521_num_bits_17_4:\n\t" "LDR r1, [%[a], #12]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_3_%=\n\t" + "BEQ L_sp_521_num_bits_17_3\n\t" #else - "BEQ.N L_sp_521_num_bits_17_3_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_3\n\t" #endif "MOV r2, #0x80\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_3_%=:\n\t" + "L_sp_521_num_bits_17_3:\n\t" "LDR r1, [%[a], #8]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_2_%=\n\t" + "BEQ L_sp_521_num_bits_17_2\n\t" #else - "BEQ.N L_sp_521_num_bits_17_2_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_2\n\t" #endif "MOV r2, #0x60\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_2_%=:\n\t" + "L_sp_521_num_bits_17_2:\n\t" "LDR r1, [%[a], #4]\n\t" "CMP r1, #0x0\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_521_num_bits_17_1_%=\n\t" + "BEQ L_sp_521_num_bits_17_1\n\t" #else - "BEQ.N L_sp_521_num_bits_17_1_%=\n\t" + "BEQ.N L_sp_521_num_bits_17_1\n\t" #endif "MOV r2, #0x40\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" - "B L_sp_521_num_bits_17_18_%=\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "B L_sp_521_num_bits_17_18\n\t" +#else + "B.N L_sp_521_num_bits_17_18\n\t" +#endif "\n" - "L_sp_521_num_bits_17_1_%=:\n\t" + "L_sp_521_num_bits_17_1:\n\t" "LDR r1, [%[a]]\n\t" "MOV r2, #0x20\n\t" "CLZ r4, r1\n\t" "SUB r4, r2, r4\n\t" "\n" - "L_sp_521_num_bits_17_18_%=:\n\t" + "L_sp_521_num_bits_17_18:\n\t" "MOV %[a], r4\n\t" : [a] "+r" (a) : @@ -63862,7 +64067,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -63918,7 +64123,7 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -67780,61 +67985,80 @@ static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "LDR r11, [%[b]]\n\t" + "UMULL r8, r6, lr, r11\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_1024_mul_32_outer_%=:\n\t" + "L_sp_1024_mul_32_outer:\n\t" "SUBS r3, r5, #0x7c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_1024_mul_32_inner_%=:\n\t" + "L_sp_1024_mul_32_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[b], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" + "LDR lr, [%[a], r4]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x80\n\t" + "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_1024_mul_32_inner_done_%=\n\t" + "BGT L_sp_1024_mul_32_inner_done\n\t" #else - "BEQ.N L_sp_1024_mul_32_inner_done_%=\n\t" + "BGT.N L_sp_1024_mul_32_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_1024_mul_32_inner_%=\n\t" + "BLT L_sp_1024_mul_32_inner\n\t" #else - "BLE.N L_sp_1024_mul_32_inner_%=\n\t" + "BLT.N L_sp_1024_mul_32_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "LDR r11, [%[b], r3]\n\t" + "UMULL r9, r10, lr, r11\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_1024_mul_32_inner_done_%=:\n\t" + "L_sp_1024_mul_32_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0xf8\n\t" + "CMP r5, #0xf4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_1024_mul_32_outer_%=\n\t" + "BLE L_sp_1024_mul_32_outer\n\t" #else - "BLE.N L_sp_1024_mul_32_outer_%=\n\t" + "BLE.N L_sp_1024_mul_32_outer\n\t" #endif + "LDR lr, [%[a], #124]\n\t" + "LDR r11, [%[b], #124]\n\t" + "UMLAL r6, r7, lr, r11\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_1024_mul_32_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_1024_mul_32_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_1024_mul_32_store_%=\n\t" + "BGT L_sp_1024_mul_32_store\n\t" #else - "BGT.N L_sp_1024_mul_32_store_%=\n\t" + "BGT.N L_sp_1024_mul_32_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -67860,24 +68084,20 @@ static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" - "MOV r6, #0x0\n\t" + "LDR lr, [%[a]]\n\t" + "UMULL r8, r6, lr, lr\n\t" + "STR r8, [sp]\n\t" "MOV r7, #0x0\n\t" "MOV r8, #0x0\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0x4\n\t" "\n" - "L_sp_1024_sqr_32_outer_%=:\n\t" + "L_sp_1024_sqr_32_outer:\n\t" "SUBS r3, r5, #0x7c\n\t" "IT cc\n\t" - "movcc r3, #0\n\t" + "MOVCC r3, #0x0\n\t" "SUB r4, r5, r3\n\t" "\n" - "L_sp_1024_sqr_32_inner_%=:\n\t" - "CMP r4, r3\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_1024_sqr_32_op_sqr_%=\n\t" -#else - "BEQ.N L_sp_1024_sqr_32_op_sqr_%=\n\t" -#endif + "L_sp_1024_sqr_32_inner:\n\t" "LDR lr, [%[a], r3]\n\t" "LDR r11, [%[a], r4]\n\t" "UMULL r9, r10, lr, r11\n\t" @@ -67887,59 +68107,51 @@ static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "ADDS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" "ADC r8, r8, #0x0\n\t" - "bal L_sp_1024_sqr_32_op_done_%=\n\t" - "\n" - "L_sp_1024_sqr_32_op_sqr_%=:\n\t" - "LDR lr, [%[a], r3]\n\t" - "UMULL r9, r10, lr, lr\n\t" - "ADDS r6, r6, r9\n\t" - "ADCS r7, r7, r10\n\t" - "ADC r8, r8, #0x0\n\t" - "\n" - "L_sp_1024_sqr_32_op_done_%=:\n\t" "ADD r3, r3, #0x4\n\t" "SUB r4, r4, #0x4\n\t" - "CMP r3, #0x80\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BEQ L_sp_1024_sqr_32_inner_done_%=\n\t" -#else - "BEQ.N L_sp_1024_sqr_32_inner_done_%=\n\t" -#endif "CMP r3, r4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_1024_sqr_32_inner_done_%=\n\t" + "BGT L_sp_1024_sqr_32_inner_done\n\t" #else - "BGT.N L_sp_1024_sqr_32_inner_done_%=\n\t" + "BGT.N L_sp_1024_sqr_32_inner_done\n\t" #endif - "CMP r3, r5\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_1024_sqr_32_inner_%=\n\t" + "BLT L_sp_1024_sqr_32_inner\n\t" #else - "BLE.N L_sp_1024_sqr_32_inner_%=\n\t" + "BLT.N L_sp_1024_sqr_32_inner\n\t" #endif + "LDR lr, [%[a], r3]\n\t" + "UMULL r9, r10, lr, lr\n\t" + "ADDS r6, r6, r9\n\t" + "ADCS r7, r7, r10\n\t" + "ADC r8, r8, #0x0\n\t" "\n" - "L_sp_1024_sqr_32_inner_done_%=:\n\t" + "L_sp_1024_sqr_32_inner_done:\n\t" "STR r6, [sp, r5]\n\t" "MOV r6, r7\n\t" "MOV r7, r8\n\t" "MOV r8, #0x0\n\t" "ADD r5, r5, #0x4\n\t" - "CMP r5, #0xf8\n\t" + "CMP r5, #0xf4\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLE L_sp_1024_sqr_32_outer_%=\n\t" + "BLE L_sp_1024_sqr_32_outer\n\t" #else - "BLE.N L_sp_1024_sqr_32_outer_%=\n\t" + "BLE.N L_sp_1024_sqr_32_outer\n\t" #endif + "LDR lr, [%[a], #124]\n\t" + "UMLAL r6, r7, lr, lr\n\t" "STR r6, [sp, r5]\n\t" + "ADD r5, r5, #0x4\n\t" + "STR r7, [sp, r5]\n\t" "\n" - "L_sp_1024_sqr_32_store_%=:\n\t" - "LDM sp!, {r6, r7, r8, r9}\n\t" - "STM %[r]!, {r6, r7, r8, r9}\n\t" - "SUBS r5, r5, #0x10\n\t" + "L_sp_1024_sqr_32_store:\n\t" + "LDM sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "STM %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "SUBS r5, r5, #0x20\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BGT L_sp_1024_sqr_32_store_%=\n\t" + "BGT L_sp_1024_sqr_32_store\n\t" #else - "BGT.N L_sp_1024_sqr_32_store_%=\n\t" + "BGT.N L_sp_1024_sqr_32_store\n\t" #endif : [r] "+r" (r), [a] "+r" (a) : @@ -68054,7 +68266,7 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) "MOV r10, #0x0\n\t" "ADD r11, %[a], #0x80\n\t" "\n" - "L_sp_1024_sub_in_pkace_32_word_%=:\n\t" + "L_sp_1024_sub_in_pkace_32_word:\n\t" "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" "LDM %[b]!, {r6, r7, r8, r9}\n\t" @@ -68066,9 +68278,9 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "BNE L_sp_1024_sub_in_pkace_32_word\n\t" #else - "BNE.N L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "BNE.N L_sp_1024_sub_in_pkace_32_word\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -68106,7 +68318,7 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "MOV r4, #0x0\n\t" "MOV r5, #0x0\n\t" "\n" - "L_sp_1024_cond_sub_32_words_%=:\n\t" + "L_sp_1024_cond_sub_32_words:\n\t" "SUBS r4, r8, r4\n\t" "LDR r6, [%[a], r5]\n\t" "LDR r7, [%[b], r5]\n\t" @@ -68117,9 +68329,9 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "ADD r5, r5, #0x4\n\t" "CMP r5, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_1024_cond_sub_32_words_%=\n\t" + "BLT L_sp_1024_cond_sub_32_words\n\t" #else - "BLT.N L_sp_1024_cond_sub_32_words_%=\n\t" + "BLT.N L_sp_1024_cond_sub_32_words\n\t" #endif "MOV %[r], r4\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -68297,7 +68509,7 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x80\n\t" "\n" - "L_sp_1024_add_32_word_%=:\n\t" + "L_sp_1024_add_32_word:\n\t" "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" @@ -68310,9 +68522,9 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b "ADC r3, r4, #0x0\n\t" "CMP %[a], r12\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_1024_add_32_word_%=\n\t" + "BNE L_sp_1024_add_32_word\n\t" #else - "BNE.N L_sp_1024_add_32_word_%=\n\t" + "BNE.N L_sp_1024_add_32_word\n\t" #endif "MOV %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -68351,7 +68563,7 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) "MOV r5, #0x0\n\t" "MOV r9, #0x4\n\t" "\n" - "L_sp_1024_mul_d_32_word_%=:\n\t" + "L_sp_1024_mul_d_32_word:\n\t" /* A[i] * B */ "LDR r8, [%[a], r9]\n\t" "UMULL r6, r7, %[b], r8\n\t" @@ -68365,9 +68577,9 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) "ADD r9, r9, #0x4\n\t" "CMP r9, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_1024_mul_d_32_word_%=\n\t" + "BLT L_sp_1024_mul_d_32_word\n\t" #else - "BLT.N L_sp_1024_mul_d_32_word_%=\n\t" + "BLT.N L_sp_1024_mul_d_32_word\n\t" #endif "STR r3, [%[r], #128]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -68574,9 +68786,9 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -68639,9 +68851,9 @@ static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -68665,7 +68877,7 @@ static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) /* Next 30 bits */ "MOV r4, #0x1d\n\t" "\n" - "L_div_1024_word_32_bit_%=:\n\t" + "L_div_1024_word_32_bit:\n\t" "LSLS r6, r6, #1\n\t" "ADC r7, r7, r7\n\t" "SUBS r8, r5, r7\n\t" @@ -68675,7 +68887,7 @@ static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) "AND r8, r8, r5\n\t" "SUBS r7, r7, r8\n\t" "SUBS r4, r4, #0x1\n\t" - "bpl L_div_1024_word_32_bit_%=\n\t" + "bpl L_div_1024_word_32_bit\n\t" "ADD r3, r3, r3\n\t" "ADD r3, r3, #0x1\n\t" "UMULL r6, r7, r3, %[div]\n\t" @@ -68757,7 +68969,7 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x7c\n\t" "\n" - "L_sp_1024_cmp_32_words_%=:\n\t" + "L_sp_1024_cmp_32_words:\n\t" "LDR r4, [%[a], r6]\n\t" "LDR r5, [%[b], r6]\n\t" "AND r4, r4, r3\n\t" @@ -68770,7 +68982,7 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) "IT ne\n\t" "movne r3, r7\n\t" "SUBS r6, r6, #0x4\n\t" - "bcs L_sp_1024_cmp_32_words_%=\n\t" + "bcs L_sp_1024_cmp_32_words\n\t" "EOR r2, r2, r3\n\t" #else "LDR r4, [%[a], #124]\n\t" @@ -69490,7 +69702,7 @@ static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r4, [%[a]]\n\t" "LDR r5, [%[a], #4]\n\t" "\n" - "L_sp_1024_mont_reduce_32_word_%=:\n\t" + "L_sp_1024_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "MUL r10, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -69753,9 +69965,9 @@ static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r11, #0x80\n\t" #ifdef __GNUC__ - "BLT L_sp_1024_mont_reduce_32_word_%=\n\t" + "BLT L_sp_1024_mont_reduce_32_word\n\t" #else - "BLT.W L_sp_1024_mont_reduce_32_word_%=\n\t" + "BLT.W L_sp_1024_mont_reduce_32_word\n\t" #endif /* Loop Done */ "STR r4, [%[a]]\n\t" @@ -69802,7 +70014,7 @@ static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "LDR r9, [%[a], #12]\n\t" "LDR r10, [%[a], #16]\n\t" "\n" - "L_sp_1024_mont_reduce_32_word_%=:\n\t" + "L_sp_1024_mont_reduce_32_word:\n\t" /* mu = a[i] * mp */ "MUL lr, %[mp], r6\n\t" /* a[i+0] += m[0] * mu */ @@ -69970,9 +70182,9 @@ static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) "ADD %[a], %[a], #0x4\n\t" "CMP r4, #0x80\n\t" #ifdef __GNUC__ - "BLT L_sp_1024_mont_reduce_32_word_%=\n\t" + "BLT L_sp_1024_mont_reduce_32_word\n\t" #else - "BLT.W L_sp_1024_mont_reduce_32_word_%=\n\t" + "BLT.W L_sp_1024_mont_reduce_32_word\n\t" #endif /* Loop Done */ "STR r6, [%[a]]\n\t" @@ -70987,7 +71199,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig "MOV r8, #0x0\n\t" "MOV r4, #0x0\n\t" "\n" - "L_sp_1024_cond_add_32_words_%=:\n\t" + "L_sp_1024_cond_add_32_words:\n\t" "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" @@ -70998,9 +71210,9 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig "ADD r4, r4, #0x4\n\t" "CMP r4, #0x80\n\t" #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BLT L_sp_1024_cond_add_32_words_%=\n\t" + "BLT L_sp_1024_cond_add_32_words\n\t" #else - "BLT.N L_sp_1024_cond_add_32_words_%=\n\t" + "BLT.N L_sp_1024_cond_add_32_words\n\t" #endif "MOV %[r], r5\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -80285,7 +80497,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -80345,7 +80557,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 6dd4e6a8bc..06c01ab005 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -6346,7 +6346,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo, if (r > SP_HALF_MAX) { r = SP_HALF_MAX; } - /* Shift up result for trial division calucation. */ + /* Shift up result for trial division calculation. */ r <<= SP_HALF_SIZE; /* Calculate trial value. */ trial = r * (sp_int_word)d; @@ -12416,7 +12416,7 @@ static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r, int bit = sp_is_bit_set(e, (unsigned int)i); /* 6.2. j += bit - * Update count of consequitive 1 bits. + * Update count of consecutive 1 bits. */ j += bit; /* 6.3. s += 1 @@ -13107,7 +13107,7 @@ static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits, DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1); /* Window bits based on number of pre-calculations versus number of loop - * calculcations. + * calculations. * Exponents for RSA and DH will result in 6-bit windows. */ if (bits > 450) { diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 916a32fbff..990a999cbb 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -55,6 +55,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -8408,8 +8409,8 @@ extern void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_4(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_4(r, a, m, mp); for (; n > 1; n--) { @@ -9608,8 +9609,8 @@ extern void sp_256_mont_sqr_avx2_4(sp_digit* r, const sp_digit* a, const sp_digi * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_avx2_4(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_avx2_4(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_avx2_4(r, a, m, mp); for (; n > 1; n--) { @@ -26391,7 +26392,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -26447,7 +26448,7 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -27534,8 +27535,8 @@ SP_NOINLINE static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_6(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_6(r, a, m, mp); for (; n > 1; n--) { @@ -28768,8 +28769,8 @@ SP_NOINLINE static void sp_384_mont_sqr_avx2_6(sp_digit* r, const sp_digit* a, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_avx2_6(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_avx2_6(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_avx2_6(r, a, m, mp); for (; n > 1; n--) { @@ -51338,7 +51339,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -51394,7 +51395,7 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -52415,8 +52416,8 @@ extern void sp_521_mont_sqr_9(sp_digit* r, const sp_digit* a, const sp_digit* m, * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_9(r, a, m, mp); for (; n > 1; n--) { @@ -53641,8 +53642,8 @@ extern void sp_521_mont_sqr_avx2_9(sp_digit* r, const sp_digit* a, const sp_digi * m Modulus (prime). * mp Montgomery multiplier. */ -static void sp_521_mont_sqr_n_avx2_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_521_mont_sqr_n_avx2_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_521_mont_sqr_avx2_9(r, a, m, mp); for (; n > 1; n--) { @@ -92476,7 +92477,7 @@ int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -92532,7 +92533,7 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -105407,7 +105408,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -105467,7 +105468,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. diff --git a/wolfcrypt/user-crypto/src/rsa.c b/wolfcrypt/user-crypto/src/rsa.c index 0c65ad098a..66357372fb 100644 --- a/wolfcrypt/user-crypto/src/rsa.c +++ b/wolfcrypt/user-crypto/src/rsa.c @@ -2042,7 +2042,7 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) { IppStatus ret; int scratchSz; - int i; /* for trys on calling make key */ + int i; /* for tries on calling make key */ int ctxSz; IppsBigNumState* pSrcPublicExp = NULL; @@ -2178,7 +2178,7 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) goto makeKeyEnd; } - /* call IPP to generate keys, if inseficent entropy error call again */ + /* call IPP to generate keys, if insufficient entropy error call again */ ret = ippStsInsufficientEntropy; while (ret == ippStsInsufficientEntropy) { ret = ippsRSA_GenerateKeys(pSrcPublicExp, key->n, key->e, diff --git a/wolfssl/internal.h b/wolfssl/internal.h index c4805ee7aa..aa7e89a70a 100644 --- a/wolfssl/internal.h +++ b/wolfssl/internal.h @@ -1764,7 +1764,7 @@ enum Misc { #ifndef ECDHE_SIZE /* allow this to be overridden at compile-time */ ECDHE_SIZE = 32, /* ECDHE server size defaults to 256 bit */ #endif - MAX_EXPORT_ECC_SZ = 256, /* Export ANS X9.62 max future size */ + MAX_EXPORT_ECC_SZ = 256, /* Export ANSI X9.62 max future size */ MAX_CURVE_NAME_SZ = 16, /* Maximum size of curve name string */ NEW_SA_MAJOR = 8, /* Most significant byte used with new sig algos */ @@ -3576,7 +3576,7 @@ struct WOLFSSL_CTX { byte sendVerify:2; /* for client side (can not be single bit) */ byte haveRSA:1; /* RSA available */ byte haveECC:1; /* ECC available */ - byte haveDH:1; /* server DH parms set by user */ + byte haveDH:1; /* server DH params set by user */ byte haveECDSAsig:1; /* server cert signed w/ ECDSA */ byte haveFalconSig:1; /* server cert signed w/ Falcon */ byte haveDilithiumSig:1;/* server cert signed w/ Dilithium */ @@ -4630,7 +4630,7 @@ struct Options { word16 usingCompression:1; /* are we using compression */ word16 haveRSA:1; /* RSA available */ word16 haveECC:1; /* ECC available */ - word16 haveDH:1; /* server DH parms set by user */ + word16 haveDH:1; /* server DH params set by user */ word16 haveECDSAsig:1; /* server ECDSA signed cert */ word16 haveStaticECC:1; /* static server ECC private key */ word16 haveFalconSig:1; /* server Falcon signed cert */ diff --git a/wolfssl/wolfcrypt/ext_lms.h b/wolfssl/wolfcrypt/ext_lms.h index 8b8a8f7288..ccdfdcb30e 100644 --- a/wolfssl/wolfcrypt/ext_lms.h +++ b/wolfssl/wolfcrypt/ext_lms.h @@ -48,8 +48,8 @@ typedef struct hss_extra_info hss_extra_info; struct LmsKey { unsigned levels; /* Number of tree levels. */ - param_set_t lm_type[MAX_HSS_LEVELS]; /* Height parm per level. */ - param_set_t lm_ots_type[MAX_HSS_LEVELS]; /* Winternitz parm per level. */ + param_set_t lm_type[MAX_HSS_LEVELS]; /* Height param per level. */ + param_set_t lm_ots_type[MAX_HSS_LEVELS]; /* Winternitz param per level. */ unsigned char pub[HSS_MAX_PUBLIC_KEY_LEN]; #ifndef WOLFSSL_LMS_VERIFY_ONLY hss_working_key * working_key; diff --git a/wolfssl/wolfcrypt/lms.h b/wolfssl/wolfcrypt/lms.h index d3ab07571d..483f349c1e 100644 --- a/wolfssl/wolfcrypt/lms.h +++ b/wolfssl/wolfcrypt/lms.h @@ -94,8 +94,8 @@ enum wc_LmsParm { /* enum wc_LmsState is to help track the state of an LMS/HSS Key. */ enum wc_LmsState { WC_LMS_STATE_FREED, /* Key has been freed from memory. */ - WC_LMS_STATE_INITED, /* Key has been inited, ready to set parms.*/ - WC_LMS_STATE_PARMSET, /* Parms are set, ready to MakeKey or Reload. */ + WC_LMS_STATE_INITED, /* Key has been inited, ready to set params.*/ + WC_LMS_STATE_PARMSET, /* Params are set, ready to MakeKey or Reload. */ WC_LMS_STATE_OK, /* Able to sign signatures and verify. */ WC_LMS_STATE_VERIFYONLY, /* A public only LmsKey. */ WC_LMS_STATE_BAD, /* Can't guarantee key's state. */ diff --git a/wolfssl/wolfcrypt/xmss.h b/wolfssl/wolfcrypt/xmss.h index 7cd8f27ffa..70f26c484c 100644 --- a/wolfssl/wolfcrypt/xmss.h +++ b/wolfssl/wolfcrypt/xmss.h @@ -104,8 +104,8 @@ enum wc_XmssRc { /* enum wc_XmssState is to help track the state of an XMSS Key. */ enum wc_XmssState { WC_XMSS_STATE_FREED, /* Key has been freed from memory. */ - WC_XMSS_STATE_INITED, /* Key has been inited, ready to set parms.*/ - WC_XMSS_STATE_PARMSET, /* Parms are set, ready to MakeKey or Reload. */ + WC_XMSS_STATE_INITED, /* Key has been inited, ready to set params.*/ + WC_XMSS_STATE_PARMSET, /* Params are set, ready to MakeKey or Reload. */ WC_XMSS_STATE_OK, /* Able to sign signatures and verify. */ WC_XMSS_STATE_VERIFYONLY, /* A public only XmssKey. */ WC_XMSS_STATE_BAD, /* Can't guarantee key's state. */