From 954507eb53effbd08feec8d7abe214b65b616694 Mon Sep 17 00:00:00 2001 From: Decho Kocharin Date: Wed, 8 Nov 2023 09:25:48 +0700 Subject: [PATCH] Add support for Veruscoin (PBaaS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Cédric CRISPIN --- examples/veruscoin_pool.json | 144 ++ .../Equihash/Custom/Veruscoin/VeruscoinJob.cs | 650 +++++++++ .../Equihash/DaemonRequests/SendCurrency.cs | 12 + .../GetBlockTemplateResponse.cs | 13 + .../Blockchain/Equihash/EquihashConstants.cs | 22 + .../Equihash/EquihashExtraNonceProvider.cs | 7 + .../Blockchain/Equihash/EquihashJob.cs | 4 +- .../Blockchain/Equihash/EquihashJobManager.cs | 85 +- .../Blockchain/Equihash/EquihashPool.cs | 60 +- .../Configuration/ClusterConfigExtensions.cs | 10 +- src/Miningcore/Native/Verushash.cs | 60 + src/Miningcore/build-libs-linux.sh | 1 + src/Miningcore/coins.json | 43 +- src/Native/libverushash/Makefile | 17 + src/Native/libverushash/crypto/common.h | 114 ++ src/Native/libverushash/crypto/haraka.c | 665 +++++++++ src/Native/libverushash/crypto/haraka.h | 128 ++ .../libverushash/crypto/haraka_portable.c | 428 ++++++ .../libverushash/crypto/haraka_portable.h | 84 ++ src/Native/libverushash/crypto/ripemd160.cpp | 291 ++++ src/Native/libverushash/crypto/ripemd160.h | 28 + src/Native/libverushash/crypto/sha256.cpp | 199 +++ src/Native/libverushash/crypto/sha256.h | 32 + src/Native/libverushash/crypto/tinyformat.h | 1013 ++++++++++++++ src/Native/libverushash/crypto/uint256.cpp | 146 ++ src/Native/libverushash/crypto/uint256.h | 176 +++ .../libverushash/crypto/utilstrencodings.cpp | 500 +++++++ .../libverushash/crypto/utilstrencodings.h | 98 ++ .../libverushash/crypto/verus_clhash.cpp | 1068 +++++++++++++++ src/Native/libverushash/crypto/verus_clhash.h | 304 +++++ .../crypto/verus_clhash_portable.cpp | 1199 +++++++++++++++++ src/Native/libverushash/crypto/verus_hash.cpp | 175 +++ src/Native/libverushash/crypto/verus_hash.h | 235 ++++ src/Native/libverushash/exports.cpp | 49 + src/Native/libverushash/libverushash.sln | 31 + src/Native/libverushash/libverushash.vcxproj | 203 +++ src/Native/libverushash/sodium.h | 69 + src/Native/libverushash/verushashverify.cpp | 125 ++ src/Native/libverushash/verushashverify.h | 20 + 39 files changed, 8475 insertions(+), 33 deletions(-) create mode 100644 examples/veruscoin_pool.json create mode 100644 src/Miningcore/Blockchain/Equihash/Custom/Veruscoin/VeruscoinJob.cs create mode 100644 src/Miningcore/Blockchain/Equihash/DaemonRequests/SendCurrency.cs create mode 100644 src/Miningcore/Native/Verushash.cs create mode 100644 src/Native/libverushash/Makefile create mode 100644 src/Native/libverushash/crypto/common.h create mode 100644 src/Native/libverushash/crypto/haraka.c create mode 100644 src/Native/libverushash/crypto/haraka.h create mode 100644 src/Native/libverushash/crypto/haraka_portable.c create mode 100644 src/Native/libverushash/crypto/haraka_portable.h create mode 100644 src/Native/libverushash/crypto/ripemd160.cpp create mode 100644 src/Native/libverushash/crypto/ripemd160.h create mode 100644 src/Native/libverushash/crypto/sha256.cpp create mode 100644 src/Native/libverushash/crypto/sha256.h create mode 100644 src/Native/libverushash/crypto/tinyformat.h create mode 100644 src/Native/libverushash/crypto/uint256.cpp create mode 100644 src/Native/libverushash/crypto/uint256.h create mode 100644 src/Native/libverushash/crypto/utilstrencodings.cpp create mode 100644 src/Native/libverushash/crypto/utilstrencodings.h create mode 100644 src/Native/libverushash/crypto/verus_clhash.cpp create mode 100644 src/Native/libverushash/crypto/verus_clhash.h create mode 100644 src/Native/libverushash/crypto/verus_clhash_portable.cpp create mode 100644 src/Native/libverushash/crypto/verus_hash.cpp create mode 100644 src/Native/libverushash/crypto/verus_hash.h create mode 100644 src/Native/libverushash/exports.cpp create mode 100644 src/Native/libverushash/libverushash.sln create mode 100644 src/Native/libverushash/libverushash.vcxproj create mode 100644 src/Native/libverushash/sodium.h create mode 100644 src/Native/libverushash/verushashverify.cpp create mode 100644 src/Native/libverushash/verushashverify.h diff --git a/examples/veruscoin_pool.json b/examples/veruscoin_pool.json new file mode 100644 index 000000000..295a2e142 --- /dev/null +++ b/examples/veruscoin_pool.json @@ -0,0 +1,144 @@ +{ + "logging": { + "level": "info", + "enableConsoleLog": true, + "enableConsoleColors": true, + "logFile": "", + "apiLogFile": "", + "logBaseDirectory": "", + "perPoolLogFile": false + }, + "banning": { + "manager": "Integrated", + "banOnJunkReceive": true, + "banOnInvalidShares": false + }, + "notifications": { + "enabled": false, + "email": { + "host": "smtp.example.com", + "port": 587, + "user": "user", + "password": "password", + "fromAddress": "info@yourpool.org", + "fromName": "support" + }, + "admin": { + "enabled": false, + "emailAddress": "user@example.com", + "notifyBlockFound": true + } + }, + "persistence": { + "postgres": { + "host": "127.0.0.1", + "port": 5432, + "user": "miningcore", + "password": "password", + "database": "miningcore" + } + }, + "paymentProcessing": { + "enabled": true, + "interval": 600, + "shareRecoveryFile": "recovered-shares.txt" + }, + "api": { + "enabled": true, + "listenAddress": "*", + "port": 4000, + "metricsIpWhitelist": [], + "rateLimiting": { + "disabled": true, + "rules": [ + { + "Endpoint": "*", + "Period": "1s", + "Limit": 5 + } + ], + "ipWhitelist": [ + "" + ] + } + }, + "pools": [{ + "id": "vrsc1", + "enabled": true, + "coin": "veruscoin", + "address": "RHux2fYMyxMG4W5F2Va436cANsR47JUvTE", + "GBTArgs": [{ + "capabilities": [ + "coinbasetxn", + "workid", + "coinbase/append" + ], + "rules": [ + "segwit" + ] + }], + "rewardRecipients": [ + { + "type": "op", + "address": "RHux2fYMyxMG4W5F2Va436cANsR47JUvTE", + "percentage": 1.5 + } + ], + "blockRefreshInterval": 0, + "jobRebroadcastTimeout": 0, + "clientConnectionTimeout": 2400, + "banning": { + "enabled": true, + "time": 600, + "invalidPercent": 50, + "checkThreshold": 50 + }, + "ports": { + "3092": { + "listenAddress": "0.0.0.0", + "difficulty": 256, + "varDiff": { + "minDiff": 256, + "maxDiff": 1048576000, + "targetTime": 15, + "retargetTime": 90, + "variancePercent": 30, + "maxDelta": 512 + } + }, + "3093": { + "listenAddress": "0.0.0.0", + "difficulty": 256, + "tls": true, + "tlsPfxFile": "", + "tlsPfxPassword": "password", + "varDiff": { + "minDiff": 256, + "maxDiff": 1048576000, + "targetTime": 15, + "retargetTime": 90, + "variancePercent": 30, + "maxDelta": 512 + } + } + }, + "daemons": [ + { + "host": "127.0.0.1", + "port": 7771, + "user": "user", + "password": "pass", + "zmqBlockNotifySocket": "tcp://127.0.0.1:7772", + "zmqBlockNotifyTopic": "hashblock" + } + ], + "paymentProcessing": { + "enabled": true, + "minimumPayment": 1, + "payoutScheme": "PPLNS", + "payoutSchemeConfig": { + "factor": 0.5 + } + } + }] +} \ No newline at end of file diff --git a/src/Miningcore/Blockchain/Equihash/Custom/Veruscoin/VeruscoinJob.cs b/src/Miningcore/Blockchain/Equihash/Custom/Veruscoin/VeruscoinJob.cs new file mode 100644 index 000000000..5a5f39be0 --- /dev/null +++ b/src/Miningcore/Blockchain/Equihash/Custom/Veruscoin/VeruscoinJob.cs @@ -0,0 +1,650 @@ +using System.Globalization; +using Miningcore.Blockchain.Bitcoin; +using Miningcore.Blockchain.Equihash.DaemonResponses; +using Miningcore.Configuration; +using Miningcore.Contracts; +using Miningcore.Crypto.Hashing.Equihash; +using Miningcore.Extensions; +using Miningcore.Native; +using Miningcore.Stratum; +using Miningcore.Time; +using Miningcore.Util; +using NBitcoin; +using NBitcoin.DataEncoders; +using NBitcoin.Zcash; + +namespace Miningcore.Blockchain.Equihash.Custom.Veruscoin; + +public class VeruscoinJob : EquihashJob +{ + // PBaaS + public bool isPBaaSActive; + + protected uint coinbaseIndex = 4294967295u; + protected uint coinbaseSequence = 4294967295u; + // protected string poolHex = "56525343"; + private static uint txInputCount = 1u; + private static uint txLockTime; + private static uint txExpiryHeight = 0u; + private static long txBalance = 0; + private static uint txVShieldedSpend = 0u; + private static uint txVShieldedOutput = 0u; + private static uint txJoinSplits = 0u; + + protected override Transaction CreateOutputTransaction() + { + var txNetwork = Network.GetNetwork(networkParams.CoinbaseTxNetwork); + var tx = Transaction.Create(txNetwork); + + // set versions + tx.Version = txVersion; + + /* if(isOverwinterActive) + { + overwinterField.SetValue(tx, true); + versionGroupField.SetValue(tx, txVersionGroupId); + } */ + + // calculate outputs + if(networkParams.PayFundingStream) + { + rewardToPool = new Money(Math.Round(blockReward * (1m - (networkParams.PercentFoundersReward) / 100m)) + rewardFees, MoneyUnit.Satoshi); + tx.Outputs.Add(rewardToPool, poolAddressDestination); + + foreach(FundingStream fundingstream in BlockTemplate.Subsidy.FundingStreams) + { + var amount = new Money(Math.Round(fundingstream.ValueZat / 1m), MoneyUnit.Satoshi); + var destination = FoundersAddressToScriptDestination(fundingstream.Address); + tx.Outputs.Add(amount, destination); + } + } + else if(networkParams.vOuts) + { + rewardToPool = new Money(Math.Round(blockReward * (1m - (networkParams.vPercentFoundersReward) / 100m)) + rewardFees, MoneyUnit.Satoshi); + tx.Outputs.Add(rewardToPool, poolAddressDestination); + var destination = FoundersAddressToScriptDestination(networkParams.vTreasuryRewardAddress); + var amount = new Money(Math.Round(blockReward * (networkParams.vPercentTreasuryReward / 100m)), MoneyUnit.Satoshi); + tx.Outputs.Add(amount, destination); + destination = FoundersAddressToScriptDestination(networkParams.vSecureNodesRewardAddress); + amount = new Money(Math.Round(blockReward * (networkParams.percentSecureNodesReward / 100m)), MoneyUnit.Satoshi); + tx.Outputs.Add(amount, destination); + destination = FoundersAddressToScriptDestination(networkParams.vSuperNodesRewardAddress); + amount = new Money(Math.Round(blockReward * (networkParams.percentSuperNodesReward / 100m)), MoneyUnit.Satoshi); + tx.Outputs.Add(amount, destination); + } + else if(networkParams.PayFoundersReward && + (networkParams.LastFoundersRewardBlockHeight >= BlockTemplate.Height || + networkParams.TreasuryRewardStartBlockHeight > 0)) + { + // founders or treasury reward? + if(networkParams.TreasuryRewardStartBlockHeight > 0 && + BlockTemplate.Height >= networkParams.TreasuryRewardStartBlockHeight) + { + // pool reward (t-addr) + rewardToPool = new Money(Math.Round(blockReward * (1m - (networkParams.PercentTreasuryReward) / 100m)) + rewardFees, MoneyUnit.Satoshi); + tx.Outputs.Add(rewardToPool, poolAddressDestination); + + // treasury reward (t-addr) + var destination = FoundersAddressToScriptDestination(GetVeruscoinTreasuryRewardAddress()); + var amount = new Money(Math.Round(blockReward * (networkParams.PercentTreasuryReward / 100m)), MoneyUnit.Satoshi); + tx.Outputs.Add(amount, destination); + } + + else + { + // pool reward (t-addr) + rewardToPool = new Money(Math.Round(blockReward * (1m - (networkParams.PercentFoundersReward) / 100m)) + rewardFees, MoneyUnit.Satoshi); + tx.Outputs.Add(rewardToPool, poolAddressDestination); + + // founders reward (t-addr) + var destination = FoundersAddressToScriptDestination(GetFoundersRewardAddress()); + var amount = new Money(Math.Round(blockReward * (networkParams.PercentFoundersReward / 100m)), MoneyUnit.Satoshi); + tx.Outputs.Add(amount, destination); + } + } + + else + { + // no founders reward + // pool reward (t-addr) + rewardToPool = new Money(blockReward + rewardFees, MoneyUnit.Satoshi); + tx.Outputs.Add(rewardToPool, poolAddressDestination); + } + + tx.Inputs.Add(TxIn.CreateCoinbase((int) BlockTemplate.Height)); + + return tx; + } + + private string GetVeruscoinTreasuryRewardAddress() + { + var index = (int) Math.Floor((BlockTemplate.Height - networkParams.TreasuryRewardStartBlockHeight) / + networkParams.TreasuryRewardAddressChangeInterval % networkParams.TreasuryRewardAddresses.Length); + + var address = networkParams.TreasuryRewardAddresses[index]; + return address; + } + + protected override void BuildCoinbase() + { + // output transaction + txOut = CreateOutputTransaction(); + + // when PBaaS activates we must use the coinbasetxn from daemon to get proper fee pool calculations in coinbase + var solutionVersion = BlockTemplate.Solution.Substring(0, 8); + var reversedSolutionVersion = uint.Parse(solutionVersion.HexToReverseByteArray().ToHexString(), NumberStyles.HexNumber); + isPBaaSActive = (reversedSolutionVersion > 6); + + if(!isPBaaSActive) + { + var script = TxIn.CreateCoinbase((int) BlockTemplate.Height).ScriptSig; + + /* var blockHeight = (int) BlockTemplate.Height; + var blockHeightSerial = blockHeight.ToString(); + if (blockHeightSerial.Length % 2 != 0) + blockHeightSerial = "0" + blockHeightSerial; + + int shiftedHeight = blockHeight << 1; + int height = (int) Math.Ceiling((double) shiftedHeight.ToString().Length / 8); + int lengthDiff = blockHeightSerial.Length / 2 - height; + for (int i = 0; i < lengthDiff; i++) { + blockHeightSerial += "00"; + } + + var length = "0" + height.ToString(); + + var lengthBytes = (Span) length.HexToByteArray(); + var blockHeightSerialBytes = (Span) blockHeightSerial.HexToReverseByteArray(); + var opBytes = (Span) new byte[] { 0x00 }; + var poolHexBytes = (Span) poolHex.HexToByteArray(); + + // concat length, blockHeightSerial, OP_0 and poolHex + Span serializedBlockHeightBytes = stackalloc byte[lengthBytes.Length + blockHeightSerialBytes.Length + opBytes.Length + poolHexBytes.Length]; + lengthBytes.CopyTo(serializedBlockHeightBytes); + var offset = lengthBytes.Length; + blockHeightSerialBytes.CopyTo(serializedBlockHeightBytes[offset..]); + offset += blockHeightSerialBytes.Length; + opBytes.CopyTo(serializedBlockHeightBytes[offset..]); + offset += poolHexBytes.Length; + poolHexBytes.CopyTo(serializedBlockHeightBytes[offset..]); */ + + using(var stream = new MemoryStream()) + { + var bs = new ZcashStream(stream, true); + + bs.Version = txVersion; + bs.Overwintered = isOverwinterActive; + + /* if(isOverwinterActive) + { + uint mask = (isOverwinterActive ? 1u : 0u ); + uint shiftedMask = mask << 31; + uint versionWithOverwinter = txVersion | shiftedMask; + + // version + bs.ReadWrite(ref versionWithOverwinter); + } + else + { + // version + bs.ReadWrite(ref txVersion); + } + + if(isOverwinterActive || isSaplingActive) + { + bs.ReadWrite(ref txVersionGroupId); + } */ + + // serialize (simulated) input transaction + bs.ReadWriteAsVarInt(ref txInputCount); + bs.ReadWrite(ref sha256Empty); + bs.ReadWrite(ref coinbaseIndex); + // bs.ReadWrite(ref serializedBlockHeightBytes); + bs.ReadWrite(ref script); + bs.ReadWrite(ref coinbaseSequence); + + // serialize output transaction + var txOutBytes = SerializeOutputTransaction(txOut); + bs.ReadWrite(ref txOutBytes); + + // misc + bs.ReadWrite(ref txLockTime); + + if(isOverwinterActive || isSaplingActive) + { + bs.ReadWrite(ref txExpiryHeight); + } + + if(isSaplingActive) + { + bs.ReadWrite(ref txBalance); + bs.ReadWriteAsVarInt(ref txVShieldedSpend); + bs.ReadWriteAsVarInt(ref txVShieldedOutput); + } + + if(isOverwinterActive || isSaplingActive) + { + bs.ReadWriteAsVarInt(ref txJoinSplits); + } + + // done + coinbaseInitial = stream.ToArray(); + coinbaseInitialHash = new byte[32]; + sha256D.Digest(coinbaseInitial, coinbaseInitialHash); + } + } + else + { + coinbaseInitial = BlockTemplate.CoinbaseTx.Data.HexToByteArray(); + coinbaseInitialHash = BlockTemplate.CoinbaseTx.Hash.HexToReverseByteArray(); + } + } + + private byte[] SerializeOutputTransaction(Transaction tx) + { + var withDefaultWitnessCommitment = !string.IsNullOrEmpty(BlockTemplate.DefaultWitnessCommitment); + + var outputCount = (uint) tx.Outputs.Count; + if(withDefaultWitnessCommitment) + outputCount++; + + using(var stream = new MemoryStream()) + { + var bs = new BitcoinStream(stream, true); + + // write output count + bs.ReadWriteAsVarInt(ref outputCount); + + long amount; + byte[] raw; + uint rawLength; + + // serialize outputs + foreach(var output in tx.Outputs) + { + amount = output.Value.Satoshi; + var outScript = output.ScriptPubKey; + raw = outScript.ToBytes(true); + rawLength = (uint) raw.Length; + + bs.ReadWrite(ref amount); + bs.ReadWriteAsVarInt(ref rawLength); + bs.ReadWrite(ref raw); + } + + // serialize witness (segwit) + if(withDefaultWitnessCommitment) + { + amount = 0; + raw = BlockTemplate.DefaultWitnessCommitment.HexToByteArray(); + rawLength = (uint) raw.Length; + + bs.ReadWrite(ref amount); + bs.ReadWriteAsVarInt(ref rawLength); + bs.ReadWrite(ref raw); + } + + return stream.ToArray(); + } + } + + private byte[] BuildVeruscoinRawTransactionBuffer() + { + using(var stream = new MemoryStream()) + { + foreach(var tx in BlockTemplate.Transactions) + { + var txRaw = tx.Data.HexToByteArray(); + stream.Write(txRaw); + } + + return stream.ToArray(); + } + } + + private byte[] SerializeVeruscoinBlock(Span header, Span coinbase, Span solution) + { + var transactionCount = (uint) BlockTemplate.Transactions.Length + 1; // +1 for prepended coinbase tx + var rawTransactionBuffer = BuildVeruscoinRawTransactionBuffer(); + + using(var stream = new MemoryStream()) + { + var bs = new BitcoinStream(stream, true); + + bs.ReadWrite(ref header); + bs.ReadWrite(ref solution); + + /* var txCount = transactionCount.ToString(); + if (Math.Abs(txCount.Length % 2) == 1) + txCount = "0" + txCount; + + if (transactionCount <= 0x7f) + { + var simpleVarIntBytes = (Span) txCount.HexToByteArray(); + + bs.ReadWrite(ref simpleVarIntBytes); + } + else if (transactionCount <= 0x7fff) + { + if (txCount.Length == 2) + txCount = "00" + txCount; + + var complexHeader = (Span) new byte[] { 0xFD }; + var complexVarIntBytes = (Span) txCount.HexToReverseByteArray(); + + // concat header and varInt + Span complexHeaderVarIntBytes = stackalloc byte[complexHeader.Length + complexVarIntBytes.Length]; + complexHeader.CopyTo(complexHeaderVarIntBytes); + complexVarIntBytes.CopyTo(complexHeaderVarIntBytes[complexHeader.Length..]); + + bs.ReadWrite(ref complexHeaderVarIntBytes); + } */ + + bs.ReadWriteAsVarInt(ref transactionCount); + bs.ReadWrite(ref coinbase); + bs.ReadWrite(ref rawTransactionBuffer); + + return stream.ToArray(); + } + } + + private (Share Share, string BlockHex) ProcessVersucoinShareInternal(StratumConnection worker, string nonce, + uint nTime, string solution) + { + var context = worker.ContextAs(); + var solutionBytes = (Span) solution.HexToByteArray(); + + // serialize block-header + var headerBytes = SerializeHeader(nTime, nonce); + + // concat header and solution + Span headerSolutionBytes = stackalloc byte[headerBytes.Length + solutionBytes.Length]; + headerBytes.CopyTo(headerSolutionBytes); + + solutionBytes.CopyTo(headerSolutionBytes[headerBytes.Length..]); + + // hash block-header + Span headerHash = stackalloc byte[32]; + + Verushash headerHasherVerus = new Verushash(); + + if (BlockTemplate.Version > 4 && !string.IsNullOrEmpty(BlockTemplate.Solution)) + { + // make sure verus solution version matches expected version + if (solution.Substring(VeruscoinConstants.SolutionSlice, 2) != BlockTemplate.Solution.Substring(0, 2)) + throw new StratumException(StratumError.Other, $"invalid solution - expected solution header: {BlockTemplate.Solution.Substring(0, 2)}"); + + if (solution.Substring(VeruscoinConstants.SolutionSlice, 2) == "03") + headerHasherVerus.Digest(headerSolutionBytes, headerHash, VeruscoinConstants.HashVersion2b1); + else + headerHasherVerus.Digest(headerSolutionBytes, headerHash, VeruscoinConstants.HashVersion2b2); + } + else if (BlockTemplate.Version > 4) + headerHasherVerus.Digest(headerSolutionBytes, headerHash, VeruscoinConstants.HashVersion2b); + else + headerHasherVerus.Digest(headerSolutionBytes, headerHash); + + var headerValue = new uint256(headerHash); + + // calc share-diff + var shareDiff = (double) new BigRational(networkParams.Diff1BValue, headerHash.ToBigInteger()); + var stratumDifficulty = context.Difficulty; + var ratio = shareDiff / stratumDifficulty; + + // check if the share meets the much harder block difficulty (block candidate) + var isBlockCandidate = headerValue <= blockTargetValue; + + // test if share meets at least workers current difficulty + if(!isBlockCandidate && ratio < 0.99) + { + // check if share matched the previous difficulty from before a vardiff retarget + if(context.VarDiff?.LastUpdate != null && context.PreviousDifficulty.HasValue) + { + ratio = shareDiff / context.PreviousDifficulty.Value; + + if(ratio < 0.99) + throw new StratumException(StratumError.LowDifficultyShare, $"low difficulty share ({shareDiff})"); + + // use previous difficulty + stratumDifficulty = context.PreviousDifficulty.Value; + } + + else + throw new StratumException(StratumError.LowDifficultyShare, $"low difficulty share ({shareDiff})"); + } + + var result = new Share + { + BlockHeight = BlockTemplate.Height, + NetworkDifficulty = Difficulty, + Difficulty = stratumDifficulty, + }; + + if(isBlockCandidate) + { + var headerHashReversed = headerHash.ToNewReverseArray(); + + result.IsBlockCandidate = true; + result.BlockReward = rewardToPool.ToDecimal(MoneyUnit.BTC); + result.BlockHash = headerHashReversed.ToHexString(); + var blockBytes = SerializeVeruscoinBlock(headerBytes, coinbaseInitial, solutionBytes); + var blockHex = blockBytes.ToHexString(); + + return (result, blockHex); + } + + return (result, null); + } + + private bool RegisterVersucoinSubmit(string nonce, string solution) + { + var key = nonce + solution; + + return submissions.TryAdd(key, true); + } + + #region API-Surface + + public override void Init(EquihashBlockTemplate blockTemplate, string jobId, + PoolConfig poolConfig, ClusterConfig clusterConfig, IMasterClock clock, + IDestination poolAddressDestination, Network network, + EquihashSolver solver) + { + Contract.RequiresNonNull(blockTemplate); + Contract.RequiresNonNull(poolConfig); + Contract.RequiresNonNull(clusterConfig); + Contract.RequiresNonNull(clock); + Contract.RequiresNonNull(poolAddressDestination); + Contract.RequiresNonNull(solver); + Contract.Requires(!string.IsNullOrEmpty(jobId)); + + this.clock = clock; + this.poolAddressDestination = poolAddressDestination; + coin = poolConfig.Template.As(); + networkParams = coin.GetNetwork(network.ChainName); + this.network = network; + BlockTemplate = blockTemplate; + JobId = jobId; + Difficulty = (double) new BigRational(networkParams.Diff1BValue, BlockTemplate.Target.HexToReverseByteArray().AsSpan().ToBigInteger()); + + // ZCash Sapling & Overwinter support + isSaplingActive = networkParams.SaplingActivationHeight.HasValue && + networkParams.SaplingTxVersion.HasValue && + networkParams.SaplingTxVersionGroupId.HasValue && + networkParams.SaplingActivationHeight.Value > 0 && + blockTemplate.Height >= networkParams.SaplingActivationHeight.Value; + + isOverwinterActive = isSaplingActive || + networkParams.OverwinterTxVersion.HasValue && + networkParams.OverwinterTxVersionGroupId.HasValue && + networkParams.OverwinterActivationHeight.HasValue && + networkParams.OverwinterActivationHeight.Value > 0 && + blockTemplate.Height >= networkParams.OverwinterActivationHeight.Value; + + if(isSaplingActive) + { + txVersion = networkParams.SaplingTxVersion.Value; + txVersionGroupId = networkParams.SaplingTxVersionGroupId.Value; + } + + else if(isOverwinterActive) + { + txVersion = networkParams.OverwinterTxVersion.Value; + txVersionGroupId = networkParams.OverwinterTxVersionGroupId.Value; + } + + // Misc + isPBaaSActive = false; + this.solver = solver; + + // pbaas minimal merged mining target + if(!string.IsNullOrEmpty(BlockTemplate.MergedBits)) + { + var tmpMergedBits = new Target(BlockTemplate.MergedBits.HexToByteArray()); + blockTargetValue = tmpMergedBits.ToUInt256(); + } + else if(!string.IsNullOrEmpty(BlockTemplate.MergeMineBits)) + { + var tmpMergeMineBits = new Target(BlockTemplate.MergeMineBits.HexToByteArray()); + blockTargetValue = tmpMergeMineBits.ToUInt256(); + } + else if(!string.IsNullOrEmpty(BlockTemplate.Target)) + blockTargetValue = new uint256(BlockTemplate.Target); + else + { + var tmpBits = new Target(BlockTemplate.Bits.HexToByteArray()); + blockTargetValue = tmpBits.ToUInt256(); + } + + previousBlockHashReversedHex = BlockTemplate.PreviousBlockhash + .HexToByteArray() + .ReverseInPlace() + .ToHexString(); + + if(blockTemplate.Subsidy != null) + blockReward = blockTemplate.Subsidy.Miner * BitcoinConstants.SatoshisPerBitcoin; + else + blockReward = BlockTemplate.CoinbaseValue; + + if(networkParams?.PayFundingStream == true) + { + decimal fundingstreamTotal = 0; + fundingstreamTotal = blockTemplate.Subsidy.FundingStreams.Sum(x => x.Value); + blockReward = (blockTemplate.Subsidy.Miner + fundingstreamTotal) * BitcoinConstants.SatoshisPerBitcoin; + } + else if(networkParams?.vOuts == true) + { + blockReward = (decimal) ((blockTemplate.Subsidy.Miner + blockTemplate.Subsidy.Community + blockTemplate.Subsidy.Securenodes + blockTemplate.Subsidy.Supernodes) * BitcoinConstants.SatoshisPerBitcoin); + } + else if(networkParams?.PayFoundersReward == true) + { + var founders = blockTemplate.Subsidy.Founders ?? blockTemplate.Subsidy.Community; + + if(!founders.HasValue) + throw new Exception("Error, founders reward missing for block template"); + + blockReward = (blockTemplate.Subsidy.Miner + founders.Value) * BitcoinConstants.SatoshisPerBitcoin; + } + + rewardFees = blockTemplate.Transactions.Sum(x => x.Fee); + + BuildCoinbase(); + + // build tx hashes + var txHashes = new List { new(coinbaseInitialHash) }; + txHashes.AddRange(BlockTemplate.Transactions.Select(tx => new uint256(tx.Hash.HexToReverseByteArray()))); + + // build merkle root + merkleRoot = MerkleNode.GetRoot(txHashes).Hash.ToBytes().ReverseInPlace(); + merkleRootReversed = merkleRoot.ReverseInPlace(); + merkleRootReversedHex = merkleRootReversed.ToHexString(); + + // misc + var hashReserved = isSaplingActive && !string.IsNullOrEmpty(blockTemplate.FinalSaplingRootHash) ? + blockTemplate.FinalSaplingRootHash.HexToReverseByteArray().ToHexString() : + sha256Empty.ToHexString(); + + string solutionIn = null; + // VerusHash V2.1 activation + if (!string.IsNullOrEmpty(blockTemplate.Solution)) + { + char[] charsToTrim = {'0'}; + solutionIn = blockTemplate.Solution.TrimEnd(charsToTrim); + + if ((solutionIn.Length % 2) == 1) + solutionIn += "0"; + } + + jobParams = new object[] + { + JobId, + BlockTemplate.Version.ReverseByteOrder().ToStringHex8(), + previousBlockHashReversedHex, + merkleRootReversedHex, + hashReserved, + BlockTemplate.CurTime.ReverseByteOrder().ToStringHex8(), + BlockTemplate.Bits.HexToReverseByteArray().ToHexString(), + true, + solutionIn + }; + } + + public override (Share Share, string BlockHex) ProcessShare(StratumConnection worker, string extraNonce2, string nTime, string solution) + { + Contract.RequiresNonNull(worker); + Contract.Requires(!string.IsNullOrEmpty(extraNonce2)); + Contract.Requires(!string.IsNullOrEmpty(nTime)); + Contract.Requires(!string.IsNullOrEmpty(solution)); + + var context = worker.ContextAs(); + + // validate nTime + if(nTime.Length != 8) + throw new StratumException(StratumError.Other, "incorrect size of ntime"); + + var nTimeInt = uint.Parse(nTime.HexToReverseByteArray().ToHexString(), NumberStyles.HexNumber); + // if(nTimeInt < BlockTemplate.CurTime || nTimeInt > ((DateTimeOffset) clock.Now).ToUnixTimeSeconds() + 7200) + // throw new StratumException(StratumError.Other, "ntime out of range"); + + if(nTimeInt != BlockTemplate.CurTime) + throw new StratumException(StratumError.Other, "ntime out of range"); + + var nonce = context.ExtraNonce1 + extraNonce2; + + // validate nonce + if(nonce.Length != 64) + throw new StratumException(StratumError.Other, "incorrect size of extraNonce2"); + + // validate solution + if(solution.Length != (networkParams.SolutionSize + networkParams.SolutionPreambleSize) * 2) + throw new StratumException(StratumError.Other, "incorrect size of solution"); + + // dupe check + if(!RegisterVersucoinSubmit(nonce, solution)) + throw new StratumException(StratumError.DuplicateShare, "duplicate share"); + + // when pbaas activates use block header nonce from daemon, pool/miner can no longer manipulate + if(isPBaaSActive) + { + if(string.IsNullOrEmpty(BlockTemplate.Nonce)) + throw new StratumException(StratumError.Other, "block header nonce not provided by daemon"); + else + nonce = BlockTemplate.Nonce.HexToReverseByteArray().ToHexString(); + + // verify pool nonce presence in solution + var solutionExtraData = solution.Substring(solution.Length - 30); + if(solutionExtraData.IndexOf(context.ExtraNonce1) < 0) + throw new StratumException(StratumError.Other, "invalid solution, pool nonce missing"); + } + + return ProcessVersucoinShareInternal(worker, nonce, nTimeInt, solution); + } + + public override object GetJobParams(bool isNew) + { + jobParams[^2] = isNew; + return jobParams; + } + + #endregion // API-Surface +} \ No newline at end of file diff --git a/src/Miningcore/Blockchain/Equihash/DaemonRequests/SendCurrency.cs b/src/Miningcore/Blockchain/Equihash/DaemonRequests/SendCurrency.cs new file mode 100644 index 000000000..3d7e6507d --- /dev/null +++ b/src/Miningcore/Blockchain/Equihash/DaemonRequests/SendCurrency.cs @@ -0,0 +1,12 @@ +using Newtonsoft.Json; + +namespace Miningcore.Blockchain.Equihash.DaemonRequests; + +public class SendCurrencyOutputs +{ + [JsonProperty("currency", NullValueHandling = NullValueHandling.Ignore)] + public string Currency { get; set; } + + public decimal Amount { get; set; } + public string Address { get; set; } +} \ No newline at end of file diff --git a/src/Miningcore/Blockchain/Equihash/DaemonResponses/GetBlockTemplateResponse.cs b/src/Miningcore/Blockchain/Equihash/DaemonResponses/GetBlockTemplateResponse.cs index dc889a1ed..fa16c888c 100644 --- a/src/Miningcore/Blockchain/Equihash/DaemonResponses/GetBlockTemplateResponse.cs +++ b/src/Miningcore/Blockchain/Equihash/DaemonResponses/GetBlockTemplateResponse.cs @@ -31,4 +31,17 @@ public class EquihashBlockTemplate : Bitcoin.DaemonResponses.BlockTemplate [JsonProperty("finalsaplingroothash")] public string FinalSaplingRootHash { get; set; } + + // Veruscoin + [JsonProperty("merged_bits")] + public string MergedBits { get; set; } = null; + + [JsonProperty("mergeminebits")] + public string MergeMineBits { get; set; } = null; + + [JsonProperty("solution")] + public string Solution { get; set; } = null; + + [JsonProperty("nonce")] + public string Nonce { get; set; } = null; } diff --git a/src/Miningcore/Blockchain/Equihash/EquihashConstants.cs b/src/Miningcore/Blockchain/Equihash/EquihashConstants.cs index 1d3673360..eb7493b7d 100644 --- a/src/Miningcore/Blockchain/Equihash/EquihashConstants.cs +++ b/src/Miningcore/Blockchain/Equihash/EquihashConstants.cs @@ -10,6 +10,15 @@ public class EquihashConstants System.Numerics.BigInteger.Parse("0007ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", NumberStyles.HexNumber); } +public class VeruscoinConstants +{ + public const int SolutionSlice = 6; + public const string HashVersion2b2 = "2b2"; + public const string HashVersion2b1 = "2b1"; + public const string HashVersion2b = "2b"; + public const string HashVersion2 = "2"; +} + public enum ZOperationStatus { Queued, @@ -26,6 +35,14 @@ public static class EquihashCommands public const string ZGetListAddresses = "z_listaddresses"; public const string ZValidateAddress = "z_validateaddress"; public const string ZShieldCoinbase = "z_shieldcoinbase"; + + /// + /// Some projects like Veruscoin does not require shielding before being able to spend coins. + /// They can also sends coins from a t-address to t-addresses and z-addresses + /// Returns an operation-id. You use the operationid value with z_getoperationstatus and + /// z_getoperationresult to obtain the result of sending funds, which if successful, will be a txid. + /// + public const string SendCurrency = "sendcurrency"; /// /// Returns an operationid. You use the operationid value with z_getoperationstatus and @@ -36,3 +53,8 @@ public static class EquihashCommands public const string ZGetOperationStatus = "z_getoperationstatus"; public const string ZGetOperationResult = "z_getoperationresult"; } + +public static class VeruscoinCommands +{ + public const string SubmitMergedBlock = "submitmergedblock"; +} diff --git a/src/Miningcore/Blockchain/Equihash/EquihashExtraNonceProvider.cs b/src/Miningcore/Blockchain/Equihash/EquihashExtraNonceProvider.cs index 922506770..882f59b7e 100644 --- a/src/Miningcore/Blockchain/Equihash/EquihashExtraNonceProvider.cs +++ b/src/Miningcore/Blockchain/Equihash/EquihashExtraNonceProvider.cs @@ -6,3 +6,10 @@ public EquihashExtraNonceProvider(string poolId, byte? clusterInstanceId) : base { } } + +public class VeruscoinExtraNonceProvider : ExtraNonceProviderBase +{ + public VeruscoinExtraNonceProvider(string poolId, byte? clusterInstanceId) : base(poolId, 4, clusterInstanceId) + { + } +} diff --git a/src/Miningcore/Blockchain/Equihash/EquihashJob.cs b/src/Miningcore/Blockchain/Equihash/EquihashJob.cs index 9d2655e81..cb1753e3e 100644 --- a/src/Miningcore/Blockchain/Equihash/EquihashJob.cs +++ b/src/Miningcore/Blockchain/Equihash/EquihashJob.cs @@ -434,7 +434,7 @@ public virtual void Init(EquihashBlockTemplate blockTemplate, string jobId, public string JobId { get; protected set; } - public (Share Share, string BlockHex) ProcessShare(StratumConnection worker, string extraNonce2, string nTime, string solution) + public virtual (Share Share, string BlockHex) ProcessShare(StratumConnection worker, string extraNonce2, string nTime, string solution) { Contract.RequiresNonNull(worker); Contract.Requires(!string.IsNullOrEmpty(extraNonce2)); @@ -468,7 +468,7 @@ public virtual void Init(EquihashBlockTemplate blockTemplate, string jobId, return ProcessShareInternal(worker, nonce, nTimeInt, solution); } - public object GetJobParams(bool isNew) + public virtual object GetJobParams(bool isNew) { jobParams[^1] = isNew; return jobParams; diff --git a/src/Miningcore/Blockchain/Equihash/EquihashJobManager.cs b/src/Miningcore/Blockchain/Equihash/EquihashJobManager.cs index 2e4279e5e..e521b0bd8 100644 --- a/src/Miningcore/Blockchain/Equihash/EquihashJobManager.cs +++ b/src/Miningcore/Blockchain/Equihash/EquihashJobManager.cs @@ -1,8 +1,10 @@ +using System.Globalization; using Autofac; using Miningcore.Blockchain.Bitcoin; using Miningcore.Blockchain.Bitcoin.DaemonResponses; using Miningcore.Blockchain.Equihash.Custom.BitcoinGold; using Miningcore.Blockchain.Equihash.Custom.Minexcoin; +using Miningcore.Blockchain.Equihash.Custom.Veruscoin; using Miningcore.Blockchain.Equihash.DaemonResponses; using Miningcore.Configuration; using Miningcore.Contracts; @@ -10,6 +12,7 @@ using Miningcore.Extensions; using Miningcore.JsonRpc; using Miningcore.Messaging; +using Miningcore.Notifications.Messages; using Miningcore.Rpc; using Miningcore.Stratum; using Miningcore.Time; @@ -46,7 +49,7 @@ protected override void PostChainIdentifyConfigure() private async Task> GetBlockTemplateAsync(CancellationToken ct) { var subsidyResponse = await rpc.ExecuteAsync(logger, BitcoinCommands.GetBlockSubsidy, ct); - + var result = await rpc.ExecuteAsync(logger, BitcoinCommands.GetBlockTemplate, ct, extraPoolConfig?.GBTArgs ?? (object) GetBlockTemplateParams()); @@ -54,7 +57,7 @@ private async Task> GetBlockTemplateAsync(Can result.Response.Subsidy = subsidyResponse.Response; else if(subsidyResponse.Error?.Code != (int) BitcoinRPCErrorCode.RPC_METHOD_NOT_FOUND) result = new RpcResponse(null, new JsonRpcError(-1, $"{BitcoinCommands.GetBlockSubsidy} failed", null)); - + return result; } @@ -85,6 +88,9 @@ private EquihashJob CreateJob() case "MNX": return new MinexcoinJob(); + + case "VRSC": + return new VeruscoinJob(); } return new EquihashJob(); @@ -196,16 +202,21 @@ public override async Task ValidateAddressAsync(string address, Cancellati { if(string.IsNullOrEmpty(address)) return false; - + // handle t-addr if(await base.ValidateAddressAsync(address, ct)) return true; + + if(!coin.UseBitcoinPayoutHandler) + { + // handle z-addr + var result = await rpc.ExecuteAsync(logger, + EquihashCommands.ZValidateAddress, ct, new[] { address }); - // handle z-addr - var result = await rpc.ExecuteAsync(logger, - EquihashCommands.ZValidateAddress, ct, new[] { address }); - - return result.Response is {IsValid: true}; + return result.Response is {IsValid: true}; + } + + return false; } public object[] GetSubscriberData(StratumConnection worker) @@ -267,8 +278,25 @@ public async ValueTask SubmitShareAsync(StratumConnection worker, if(share.IsBlockCandidate) { logger.Info(() => $"Submitting block {share.BlockHeight} [{share.BlockHash}]"); - - var acceptResponse = await SubmitBlockAsync(share, blockHex, ct); + + SubmitResult acceptResponse; + + switch(coin.Symbol) + { + case "VRSC": + // when PBaaS activates we must use the coinbasetxn from daemon to get proper fee pool calculations in coinbase + var solutionVersion = job.BlockTemplate.Solution.Substring(0, 8); + var reversedSolutionVersion = uint.Parse(solutionVersion.HexToReverseByteArray().ToHexString(), NumberStyles.HexNumber); + var isPBaaSActive = (reversedSolutionVersion > 6); + + acceptResponse = await SubmitVeruscoinBlockAsync(share, blockHex, isPBaaSActive, ct); + + break; + default: + acceptResponse = await SubmitBlockAsync(share, blockHex, ct); + + break; + } // is it still a block candidate? share.IsBlockCandidate = acceptResponse.Accepted; @@ -304,7 +332,44 @@ public async ValueTask SubmitShareAsync(StratumConnection worker, return share; } + + protected async Task SubmitVeruscoinBlockAsync(Share share, string blockHex, bool isPBaaSActive, CancellationToken ct) + { + var requestCommand = isPBaaSActive ? VeruscoinCommands.SubmitMergedBlock : BitcoinCommands.SubmitBlock; + var batch = new [] + { + new RpcRequest(requestCommand, new[] { blockHex }), + new RpcRequest(BitcoinCommands.GetBlock, new[] { share.BlockHash }) + }; + + var results = await rpc.ExecuteBatchAsync(logger, ct, batch); + + // did submission succeed? + var submitResult = results[0]; + var submitError = submitResult.Error?.Message ?? + submitResult.Error?.Code.ToString(CultureInfo.InvariantCulture) ?? + submitResult.Response?.ToString(); + + if((!isPBaaSActive && !string.IsNullOrEmpty(submitError)) || (isPBaaSActive && !submitError.Contains("accepted"))) + { + logger.Warn(() => $"Block {share.BlockHeight} submission failed with: {submitError}"); + messageBus.SendMessage(new AdminNotification("Block submission failed", $"Pool {poolConfig.Id} {(!string.IsNullOrEmpty(share.Source) ? $"[{share.Source.ToUpper()}] " : string.Empty)}failed to submit block {share.BlockHeight}: {submitError}")); + return new SubmitResult(false, null); + } + // was it accepted? + var acceptResult = results[1]; + var block = acceptResult.Response?.ToObject(); + var accepted = acceptResult.Error == null && block?.Hash == share.BlockHash; + + if(!accepted) + { + logger.Warn(() => $"Block {share.BlockHeight} submission failed for pool {poolConfig.Id} because block was not found after submission"); + messageBus.SendMessage(new AdminNotification($"[{share.PoolId.ToUpper()}]-[{share.Source}] Block submission failed", $"[{share.PoolId.ToUpper()}]-[{share.Source}] Block {share.BlockHeight} submission failed for pool {poolConfig.Id} because block was not found after submission")); + } + + return new SubmitResult(accepted, block?.Transactions.FirstOrDefault()); + } #endregion // API-Surface } diff --git a/src/Miningcore/Blockchain/Equihash/EquihashPool.cs b/src/Miningcore/Blockchain/Equihash/EquihashPool.cs index a9959b560..9ff166795 100644 --- a/src/Miningcore/Blockchain/Equihash/EquihashPool.cs +++ b/src/Miningcore/Blockchain/Equihash/EquihashPool.cs @@ -7,6 +7,7 @@ using Microsoft.IO; using Miningcore.Blockchain.Bitcoin; using Miningcore.Blockchain.Equihash.Configuration; +using Miningcore.Blockchain.Equihash.Custom.Veruscoin; using Miningcore.Configuration; using Miningcore.Extensions; using Miningcore.JsonRpc; @@ -59,10 +60,21 @@ public override void Configure(PoolConfig pc, ClusterConfig cc) throw new PoolStartupException("Pool z-address is not configured", pc.Id); } + private EquihashJobManager createEquihashExtraNonceProvider() + { + switch(coin.Symbol) + { + case "VRSC": + return ctx.Resolve(new TypedParameter(typeof(IExtraNonceProvider), new VeruscoinExtraNonceProvider(poolConfig.Id, clusterConfig.InstanceId))); + + default: + return ctx.Resolve(new TypedParameter(typeof(IExtraNonceProvider), new EquihashExtraNonceProvider(poolConfig.Id, clusterConfig.InstanceId))); + } + } + protected override async Task SetupJobManager(CancellationToken ct) { - manager = ctx.Resolve( - new TypedParameter(typeof(IExtraNonceProvider), new EquihashExtraNonceProvider(poolConfig.Id, clusterConfig.InstanceId))); + manager = createEquihashExtraNonceProvider(); manager.Configure(poolConfig, clusterConfig); @@ -109,6 +121,7 @@ protected async Task OnSubscribeAsync(StratumConnection connection, Timestamped< throw new StratumException(StratumError.MinusOne, "missing request id"); var requestParams = request.ParamsAs(); + context.UserAgent = requestParams.FirstOrDefault()?.Trim(); var data = new object[] { @@ -117,11 +130,21 @@ protected async Task OnSubscribeAsync(StratumConnection connection, Timestamped< .Concat(manager.GetSubscriberData(connection)) .ToArray(); - await connection.RespondAsync(data, request.Id); + // Nicehash's stupid validator insists on "error" property present + // in successful responses which is a violation of the JSON-RPC spec + // [We miss you Oliver <3 We miss you so much <3 Respect the goddamn standards Nicehash :(] + var response = new JsonRpcResponse(data, request.Id); + + if(context.IsNicehash) + { + response.Extra = new Dictionary(); + response.Extra["error"] = null; + } + + await connection.RespondAsync(response); // setup worker context context.IsSubscribed = true; - context.UserAgent = requestParams.FirstOrDefault()?.Trim(); } protected async Task OnAuthorizeAsync(StratumConnection connection, Timestamped tsRequest, CancellationToken ct) @@ -149,8 +172,19 @@ protected async Task OnAuthorizeAsync(StratumConnection connection, Timestamped< if(context.IsAuthorized) { + // Nicehash's stupid validator insists on "error" property present + // in successful responses which is a violation of the JSON-RPC spec + // [We miss you Oliver <3 We miss you so much <3 Respect the goddamn standards Nicehash :(] + var response = new JsonRpcResponse(context.IsAuthorized, request.Id); + + if(context.IsNicehash) + { + response.Extra = new Dictionary(); + response.Extra["error"] = null; + } + // respond - await connection.RespondAsync(context.IsAuthorized, request.Id); + await connection.RespondAsync(response); // log association logger.Info(() => $"[{connection.ConnectionId}] Authorized worker {workerValue}"); @@ -240,7 +274,19 @@ protected virtual async Task OnSubmitAsync(StratumConnection connection, Timesta // submit var share = await manager.SubmitShareAsync(connection, requestParams, ct); - await connection.RespondAsync(true, request.Id); + + // Nicehash's stupid validator insists on "error" property present + // in successful responses which is a violation of the JSON-RPC spec + // [We miss you Oliver <3 We miss you so much <3 Respect the goddamn standards Nicehash :(] + var response = new JsonRpcResponse(true, request.Id); + + if(context.IsNicehash) + { + response.Extra = new Dictionary(); + response.Extra["error"] = null; + } + + await connection.RespondAsync(response); // publish messageBus.SendMessage(share); @@ -380,7 +426,7 @@ public override double HashrateFromShares(double shares, double interval) { var multiplier = BitcoinConstants.Pow2x32; var result = shares * multiplier / interval / 1000000 * 2; - + result /= hashrateDivisor; return result; } diff --git a/src/Miningcore/Configuration/ClusterConfigExtensions.cs b/src/Miningcore/Configuration/ClusterConfigExtensions.cs index c8a41bed2..63098f626 100644 --- a/src/Miningcore/Configuration/ClusterConfigExtensions.cs +++ b/src/Miningcore/Configuration/ClusterConfigExtensions.cs @@ -174,8 +174,14 @@ public EquihashNetworkParams GetNetwork(ChainName chain) public override string GetAlgorithmName() { - // TODO: return variant - return "Equihash"; + switch(Symbol) + { + case "VRSC": + return "Verushash"; + default: + // TODO: return variant + return "Equihash"; + } } #endregion diff --git a/src/Miningcore/Native/Verushash.cs b/src/Miningcore/Native/Verushash.cs new file mode 100644 index 000000000..0fb10949e --- /dev/null +++ b/src/Miningcore/Native/Verushash.cs @@ -0,0 +1,60 @@ +using System.Runtime.InteropServices; +using Miningcore.Blockchain.Equihash; +using Miningcore.Contracts; +using Miningcore.Native; + +// ReSharper disable InconsistentNaming + +namespace Miningcore.Native; + +public unsafe class Verushash +{ + [DllImport("libverushash", EntryPoint = "verushash2b2_export", CallingConvention = CallingConvention.Cdecl)] + public static extern void verushash2b2(byte* input, byte* output, int input_length); + + [DllImport("libverushash", EntryPoint = "verushash2b1_export", CallingConvention = CallingConvention.Cdecl)] + public static extern void verushash2b1(byte* input, byte* output, int input_length); + + [DllImport("libverushash", EntryPoint = "verushash2b_export", CallingConvention = CallingConvention.Cdecl)] + public static extern void verushash2b(byte* input, byte* output, int input_length); + + [DllImport("libverushash", EntryPoint = "verushash2_export", CallingConvention = CallingConvention.Cdecl)] + public static extern void verushash2(byte* input, byte* output, int input_length); + + [DllImport("libverushash", EntryPoint = "verushash_export", CallingConvention = CallingConvention.Cdecl)] + public static extern void verushash(byte* input, byte* output, int input_length); + + public void Digest(ReadOnlySpan data, Span result, string version = null, params object[] extra) + { + Contract.Requires(result.Length >= 32); + + fixed (byte* input = data) + { + fixed (byte* output = result) + { + switch(version) + { + case VeruscoinConstants.HashVersion2b2: + verushash2b2(input, output, data.Length); + break; + + case VeruscoinConstants.HashVersion2b1: + verushash2b1(input, output, data.Length); + break; + + case VeruscoinConstants.HashVersion2b: + verushash2b(input, output, data.Length); + break; + + case VeruscoinConstants.HashVersion2: + verushash2(input, output, data.Length); + break; + + default: + verushash(input, output, data.Length); + break; + } + } + } + } +} \ No newline at end of file diff --git a/src/Miningcore/build-libs-linux.sh b/src/Miningcore/build-libs-linux.sh index e5bf5aa30..adfed2838 100755 --- a/src/Miningcore/build-libs-linux.sh +++ b/src/Miningcore/build-libs-linux.sh @@ -33,6 +33,7 @@ export HAVE_FEATURE="$HAVE_AES $HAVE_SSE2 $HAVE_SSE3 $HAVE_SSSE3 $HAVE_AVX $HAVE (cd ../Native/libcryptonight && make -j clean && make -j) && mv ../Native/libcryptonight/libcryptonight.so "$OutDir" (cd ../Native/libkawpow && make -j clean && make -j) && mv ../Native/libkawpow/libkawpow.so "$OutDir" (cd ../Native/libevrprogpow && make -j clean && make -j) && mv ../Native/libevrprogpow/libevrprogpow.so "$OutDir" +(cd ../Native/libverushash && make clean && make) && mv ../Native/libverushash/libverushash.so "$OutDir" ((cd /tmp && rm -rf RandomX && git clone https://github.com/tevador/RandomX && cd RandomX && git checkout tags/v1.1.10 && mkdir build && cd build && cmake -DARCH=native .. && make -j) && (cd ../Native/librandomx && cp /tmp/RandomX/build/librandomx.a . && make -j clean && make -j) && mv ../Native/librandomx/librandomx.so "$OutDir") diff --git a/src/Miningcore/coins.json b/src/Miningcore/coins.json index 0ba4b79fa..72fff6ade 100644 --- a/src/Miningcore/coins.json +++ b/src/Miningcore/coins.json @@ -4392,11 +4392,12 @@ "name": "Veruscoin", "symbol": "VRSC", "family": "equihash", - "website": "", - "market": "", - "twitter": "", - "telegram": "", - "discord": "", + "website": "https://verus.io/", + "market": "https://coinmarketcap.com/currencies/veruscoin/", + "github": "https://github.com/veruscoin/veruscoin", + "twitter": "https://twitter.com/veruscoin", + "telegram": "https://t.me/veruscommunity", + "discord": "https://discord.com/invite/VRKMP2S", "networks": { "main": { "diff1": "00000f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f", @@ -4404,14 +4405,18 @@ "solutionPreambleSize": 3, "solver": { "hash": "equihash", - "args": [200, 9, "Verushash"] + "args": [ + 200, + 9, + "Verushash" + ] }, "coinbaseTxNetwork": "main", "payFoundersReward": false, "overwinterActivationHeight": 1, "overwinterTxVersion": 3, "overwinterTxVersionGroupId": 63210096, - "saplingActivationHeight": 1, + "saplingActivationHeight": 227520, "saplingTxVersion": 4, "saplingTxVersionGroupId": 2301567109 }, @@ -4421,10 +4426,20 @@ "solutionPreambleSize": 3, "solver": { "hash": "equihash", - "args": [200, 9, "Verushash"] + "args": [ + 200, + 9, + "Verushash" + ] }, "coinbaseTxNetwork": "testnet", - "payFoundersReward": false + "payFoundersReward": false, + "overwinterActivationHeight": 1, + "overwinterTxVersion": 3, + "overwinterTxVersionGroupId": 63210096, + "saplingActivationHeight": 120, + "saplingTxVersion": 4, + "saplingTxVersionGroupId": 2301567109 }, "regtest": { "diff1": "0007ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", @@ -4432,15 +4447,19 @@ "solutionPreambleSize": 3, "solver": { "hash": "equihash", - "args": [200, 9, "Verushash"] + "args": [ + 48, + 5, + "Verushash" + ] }, "coinbaseTxNetwork": "regtest", "payFoundersReward": false } }, - "usesZCashAddressFormat": true, + "usesZCashAddressFormat": false, "useBitcoinPayoutHandler": false, - "explorerBlockLink": "https://explorer.veruscoin.io/block/$height$", + "explorerBlockLink": "https://explorer.veruscoin.io/block/$hash$", "explorerTxLink": "https://explorer.veruscoin.io/tx/{0}", "explorerAccountLink": "https://explorer.veruscoin.io/address/{0}" }, diff --git a/src/Native/libverushash/Makefile b/src/Native/libverushash/Makefile new file mode 100644 index 000000000..3ac876e32 --- /dev/null +++ b/src/Native/libverushash/Makefile @@ -0,0 +1,17 @@ +CFLAGS += -g -Wall -c -fPIC -O2 -Wno-pointer-sign -Wno-char-subscripts -Wno-unused-variable -Wno-unused-function -Wno-strict-aliasing -Wno-discarded-qualifiers -Wno-unused-const-variable -msse4 -msse4.1 -msse4.2 -mssse3 -mavx -mpclmul -maes +CXXFLAGS += -g -Wall -fPIC -fpermissive -O2 -faligned-new -Wno-char-subscripts -Wno-unused-variable -Wno-unused-function -Wno-strict-aliasing -Wno-sign-compare -std=c++11 -msse4 -msse4.1 -msse4.2 -mssse3 -mavx -mpclmul -maes +LDFLAGS = -shared +LDLIBS = -lsodium +TARGET = libverushash.so + +OBJECTS = crypto/haraka.o crypto/haraka_portable.o crypto/ripemd160.o crypto/sha256.o crypto/uint256.o crypto/utilstrencodings.o crypto/verus_hash.o crypto/verus_clhash.o crypto/verus_clhash_portable.o exports.o verushashverify.o + +all: $(TARGET) + +$(TARGET): $(OBJECTS) + $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +.PHONY: clean + +clean: + $(RM) $(TARGET) $(OBJECTS) diff --git a/src/Native/libverushash/crypto/common.h b/src/Native/libverushash/crypto/common.h new file mode 100644 index 000000000..c2fc1cd1d --- /dev/null +++ b/src/Native/libverushash/crypto/common.h @@ -0,0 +1,114 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_CRYPTO_COMMON_H +#define BITCOIN_CRYPTO_COMMON_H + +#if defined(HAVE_CONFIG_H) +#include "bitcoin-config.h" +#endif + +#include +#include + +#include "../sodium.h" +#ifdef WIN32 +#include "compat/endian.h" +#else +#include +#endif + +//#if defined(NDEBUG) +//# error "Bitcoin cannot be compiled without assertions." +//#endif + +uint16_t static inline ReadLE16(const unsigned char* ptr) +{ + return le16toh(*((uint16_t*)ptr)); +} + +uint32_t static inline ReadLE32(const unsigned char* ptr) +{ + return le32toh(*((uint32_t*)ptr)); +} + +uint64_t static inline ReadLE64(const unsigned char* ptr) +{ + return le64toh(*((uint64_t*)ptr)); +} + +void static inline WriteLE16(unsigned char* ptr, uint16_t x) +{ + *((uint16_t*)ptr) = htole16(x); +} + +void static inline WriteLE32(unsigned char* ptr, uint32_t x) +{ + *((uint32_t*)ptr) = htole32(x); +} + +void static inline WriteLE64(unsigned char* ptr, uint64_t x) +{ + *((uint64_t*)ptr) = htole64(x); +} + +uint32_t static inline ReadBE32(const unsigned char* ptr) +{ + return be32toh(*((uint32_t*)ptr)); +} + +uint64_t static inline ReadBE64(const unsigned char* ptr) +{ + return be64toh(*((uint64_t*)ptr)); +} + +void static inline WriteBE32(unsigned char* ptr, uint32_t x) +{ + *((uint32_t*)ptr) = htobe32(x); +} + +void static inline WriteBE64(unsigned char* ptr, uint64_t x) +{ + *((uint64_t*)ptr) = htobe64(x); +} + +/*int inline init_and_check_sodium() +{ + if (sodium_init() == -1) { + return -1; + } + + // What follows is a runtime test that ensures the version of libsodium + // we're linked against checks that signatures are canonical (s < L). + const unsigned char message[1] = { 0 }; + + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + unsigned char sk[crypto_sign_SECRETKEYBYTES]; + unsigned char sig[crypto_sign_BYTES]; + + crypto_sign_keypair(pk, sk); + crypto_sign_detached(sig, NULL, message, sizeof(message), sk); + + assert(crypto_sign_verify_detached(sig, message, sizeof(message), pk) == 0); + + // Copied from libsodium/crypto_sign/ed25519/ref10/open.c + static const unsigned char L[32] = + { 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, + 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; + + // Add L to S, which starts at sig[32]. + unsigned int s = 0; + for (size_t i = 0; i < 32; i++) { + s = sig[32 + i] + L[i] + (s >> 8); + sig[32 + i] = s & 0xff; + } + + assert(crypto_sign_verify_detached(sig, message, sizeof(message), pk) != 0); + + return 0; +}*/ + +#endif // BITCOIN_CRYPTO_COMMON_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/haraka.c b/src/Native/libverushash/crypto/haraka.c new file mode 100644 index 000000000..49fb4196b --- /dev/null +++ b/src/Native/libverushash/crypto/haraka.c @@ -0,0 +1,665 @@ +/* +The MIT License (MIT) + +Copyright (c) 2016 kste + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Optimized Implementations for Haraka256 and Haraka512 +*/ + +#include +#include "haraka.h" + +u128 rc[40]; +u128 rc0[40] = {0}; + +void load_constants() { + rc[0] = _mm_set_epi32(0x0684704c,0xe620c00a,0xb2c5fef0,0x75817b9d); + rc[1] = _mm_set_epi32(0x8b66b4e1,0x88f3a06b,0x640f6ba4,0x2f08f717); + rc[2] = _mm_set_epi32(0x3402de2d,0x53f28498,0xcf029d60,0x9f029114); + rc[3] = _mm_set_epi32(0x0ed6eae6,0x2e7b4f08,0xbbf3bcaf,0xfd5b4f79); + rc[4] = _mm_set_epi32(0xcbcfb0cb,0x4872448b,0x79eecd1c,0xbe397044); + rc[5] = _mm_set_epi32(0x7eeacdee,0x6e9032b7,0x8d5335ed,0x2b8a057b); + rc[6] = _mm_set_epi32(0x67c28f43,0x5e2e7cd0,0xe2412761,0xda4fef1b); + rc[7] = _mm_set_epi32(0x2924d9b0,0xafcacc07,0x675ffde2,0x1fc70b3b); + rc[8] = _mm_set_epi32(0xab4d63f1,0xe6867fe9,0xecdb8fca,0xb9d465ee); + rc[9] = _mm_set_epi32(0x1c30bf84,0xd4b7cd64,0x5b2a404f,0xad037e33); + rc[10] = _mm_set_epi32(0xb2cc0bb9,0x941723bf,0x69028b2e,0x8df69800); + rc[11] = _mm_set_epi32(0xfa0478a6,0xde6f5572,0x4aaa9ec8,0x5c9d2d8a); + rc[12] = _mm_set_epi32(0xdfb49f2b,0x6b772a12,0x0efa4f2e,0x29129fd4); + rc[13] = _mm_set_epi32(0x1ea10344,0xf449a236,0x32d611ae,0xbb6a12ee); + rc[14] = _mm_set_epi32(0xaf044988,0x4b050084,0x5f9600c9,0x9ca8eca6); + rc[15] = _mm_set_epi32(0x21025ed8,0x9d199c4f,0x78a2c7e3,0x27e593ec); + rc[16] = _mm_set_epi32(0xbf3aaaf8,0xa759c9b7,0xb9282ecd,0x82d40173); + rc[17] = _mm_set_epi32(0x6260700d,0x6186b017,0x37f2efd9,0x10307d6b); + rc[18] = _mm_set_epi32(0x5aca45c2,0x21300443,0x81c29153,0xf6fc9ac6); + rc[19] = _mm_set_epi32(0x9223973c,0x226b68bb,0x2caf92e8,0x36d1943a); + rc[20] = _mm_set_epi32(0xd3bf9238,0x225886eb,0x6cbab958,0xe51071b4); + rc[21] = _mm_set_epi32(0xdb863ce5,0xaef0c677,0x933dfddd,0x24e1128d); + rc[22] = _mm_set_epi32(0xbb606268,0xffeba09c,0x83e48de3,0xcb2212b1); + rc[23] = _mm_set_epi32(0x734bd3dc,0xe2e4d19c,0x2db91a4e,0xc72bf77d); + rc[24] = _mm_set_epi32(0x43bb47c3,0x61301b43,0x4b1415c4,0x2cb3924e); + rc[25] = _mm_set_epi32(0xdba775a8,0xe707eff6,0x03b231dd,0x16eb6899); + rc[26] = _mm_set_epi32(0x6df3614b,0x3c755977,0x8e5e2302,0x7eca472c); + rc[27] = _mm_set_epi32(0xcda75a17,0xd6de7d77,0x6d1be5b9,0xb88617f9); + rc[28] = _mm_set_epi32(0xec6b43f0,0x6ba8e9aa,0x9d6c069d,0xa946ee5d); + rc[29] = _mm_set_epi32(0xcb1e6950,0xf957332b,0xa2531159,0x3bf327c1); + rc[30] = _mm_set_epi32(0x2cee0c75,0x00da619c,0xe4ed0353,0x600ed0d9); + rc[31] = _mm_set_epi32(0xf0b1a5a1,0x96e90cab,0x80bbbabc,0x63a4a350); + rc[32] = _mm_set_epi32(0xae3db102,0x5e962988,0xab0dde30,0x938dca39); + rc[33] = _mm_set_epi32(0x17bb8f38,0xd554a40b,0x8814f3a8,0x2e75b442); + rc[34] = _mm_set_epi32(0x34bb8a5b,0x5f427fd7,0xaeb6b779,0x360a16f6); + rc[35] = _mm_set_epi32(0x26f65241,0xcbe55438,0x43ce5918,0xffbaafde); + rc[36] = _mm_set_epi32(0x4ce99a54,0xb9f3026a,0xa2ca9cf7,0x839ec978); + rc[37] = _mm_set_epi32(0xae51a51a,0x1bdff7be,0x40c06e28,0x22901235); + rc[38] = _mm_set_epi32(0xa0c1613c,0xba7ed22b,0xc173bc0f,0x48a659cf); + rc[39] = _mm_set_epi32(0x756acc03,0x02288288,0x4ad6bdfd,0xe9c59da1); +} + +void test_implementations() { + unsigned char *in = (unsigned char *)calloc(64*8, sizeof(unsigned char)); + unsigned char *out256 = (unsigned char *)calloc(32*8, sizeof(unsigned char)); + unsigned char *out512 = (unsigned char *)calloc(32*8, sizeof(unsigned char)); + unsigned char testvector256[32] = {0x80, 0x27, 0xcc, 0xb8, 0x79, 0x49, 0x77, 0x4b, + 0x78, 0xd0, 0x54, 0x5f, 0xb7, 0x2b, 0xf7, 0x0c, + 0x69, 0x5c, 0x2a, 0x09, 0x23, 0xcb, 0xd4, 0x7b, + 0xba, 0x11, 0x59, 0xef, 0xbf, 0x2b, 0x2c, 0x1c}; + + unsigned char testvector512[32] = {0xbe, 0x7f, 0x72, 0x3b, 0x4e, 0x80, 0xa9, 0x98, + 0x13, 0xb2, 0x92, 0x28, 0x7f, 0x30, 0x6f, 0x62, + 0x5a, 0x6d, 0x57, 0x33, 0x1c, 0xae, 0x5f, 0x34, + 0xdd, 0x92, 0x77, 0xb0, 0x94, 0x5b, 0xe2, 0xaa}; + + + + int i; + + // Input for testvector + for(i = 0; i < 512; i++) { + in[i] = i % 64; + } + + load_constants(); + haraka512_8x(out512, in); + + // Verify output + for(i = 0; i < 32; i++) { + if (out512[i % 32] != testvector512[i]) { + printf("Error: testvector incorrect.\n"); + return; + } + } + + free(in); + free(out256); + free(out512); +} + +void haraka256(unsigned char *out, const unsigned char *in) { + __m128i s[2], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + + AES2(s[0], s[1], 0); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 4); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 8); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 12); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 16); + MIX2(s[0], s[1]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + + STORE(out, s[0]); + STORE(out + 16, s[1]); +} + +void haraka256_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) { + __m128i s[2], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + + AES2(s[0], s[1], 0); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 4); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 8); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 12); + MIX2(s[0], s[1]); + + AES2(s[0], s[1], 16); + MIX2(s[0], s[1]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + + STORE(out, s[0]); + STORE(out + 16, s[1]); +} + +void haraka256_4x(unsigned char *out, const unsigned char *in) { + __m128i s[4][2], tmp; + + s[0][0] = LOAD(in); + s[0][1] = LOAD(in + 16); + s[1][0] = LOAD(in + 32); + s[1][1] = LOAD(in + 48); + s[2][0] = LOAD(in + 64); + s[2][1] = LOAD(in + 80); + s[3][0] = LOAD(in + 96); + s[3][1] = LOAD(in + 112); + + // Round 1 + AES2_4x(s[0], s[1], s[2], s[3], 0); + + MIX2(s[0][0], s[0][1]); + MIX2(s[1][0], s[1][1]); + MIX2(s[2][0], s[2][1]); + MIX2(s[3][0], s[3][1]); + + // Round 2 + AES2_4x(s[0], s[1], s[2], s[3], 4); + + MIX2(s[0][0], s[0][1]); + MIX2(s[1][0], s[1][1]); + MIX2(s[2][0], s[2][1]); + MIX2(s[3][0], s[3][1]); + + // Round 3 + AES2_4x(s[0], s[1], s[2], s[3], 8); + + MIX2(s[0][0], s[0][1]); + MIX2(s[1][0], s[1][1]); + MIX2(s[2][0], s[2][1]); + MIX2(s[3][0], s[3][1]); + + // Round 4 + AES2_4x(s[0], s[1], s[2], s[3], 12); + + MIX2(s[0][0], s[0][1]); + MIX2(s[1][0], s[1][1]); + MIX2(s[2][0], s[2][1]); + MIX2(s[3][0], s[3][1]); + + // Round 5 + AES2_4x(s[0], s[1], s[2], s[3], 16); + + MIX2(s[0][0], s[0][1]); + MIX2(s[1][0], s[1][1]); + MIX2(s[2][0], s[2][1]); + MIX2(s[3][0], s[3][1]); + + // Feed Forward + s[0][0] = _mm_xor_si128(s[0][0], LOAD(in)); + s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16)); + s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 32)); + s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 48)); + s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 64)); + s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 80)); + s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 96)); + s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 112)); + + STORE(out, s[0][0]); + STORE(out + 16, s[0][1]); + STORE(out + 32, s[1][0]); + STORE(out + 48, s[1][1]); + STORE(out + 64, s[2][0]); + STORE(out + 80, s[2][1]); + STORE(out + 96, s[3][0]); + STORE(out + 112, s[3][1]); +} + +void haraka256_8x(unsigned char *out, const unsigned char *in) { + // This is faster on Skylake, the code below is faster on Haswell. + haraka256_4x(out, in); + haraka256_4x(out + 128, in + 128); + return; + // __m128i s[8][2], tmp; + // + // int i; + // + // s[0][0] = LOAD(in); + // s[0][1] = LOAD(in + 16); + // s[1][0] = LOAD(in + 32); + // s[1][1] = LOAD(in + 48); + // s[2][0] = LOAD(in + 64); + // s[2][1] = LOAD(in + 80); + // s[3][0] = LOAD(in + 96); + // s[3][1] = LOAD(in + 112); + // s[4][0] = LOAD(in + 128); + // s[4][1] = LOAD(in + 144); + // s[5][0] = LOAD(in + 160); + // s[5][1] = LOAD(in + 176); + // s[6][0] = LOAD(in + 192); + // s[6][1] = LOAD(in + 208); + // s[7][0] = LOAD(in + 224); + // s[7][1] = LOAD(in + 240); + // + // // Round 1 + // AES2_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0); + // + // MIX2(s[0][0], s[0][1]); + // MIX2(s[1][0], s[1][1]); + // MIX2(s[2][0], s[2][1]); + // MIX2(s[3][0], s[3][1]); + // MIX2(s[4][0], s[4][1]); + // MIX2(s[5][0], s[5][1]); + // MIX2(s[6][0], s[6][1]); + // MIX2(s[7][0], s[7][1]); + // + // + // // Round 2 + // AES2_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 4); + // + // MIX2(s[0][0], s[0][1]); + // MIX2(s[1][0], s[1][1]); + // MIX2(s[2][0], s[2][1]); + // MIX2(s[3][0], s[3][1]); + // MIX2(s[4][0], s[4][1]); + // MIX2(s[5][0], s[5][1]); + // MIX2(s[6][0], s[6][1]); + // MIX2(s[7][0], s[7][1]); + // + // // Round 3 + // AES2_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8); + // + // MIX2(s[0][0], s[0][1]); + // MIX2(s[1][0], s[1][1]); + // MIX2(s[2][0], s[2][1]); + // MIX2(s[3][0], s[3][1]); + // MIX2(s[4][0], s[4][1]); + // MIX2(s[5][0], s[5][1]); + // MIX2(s[6][0], s[6][1]); + // MIX2(s[7][0], s[7][1]); + // + // // Round 4 + // AES2_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 12); + // + // MIX2(s[0][0], s[0][1]); + // MIX2(s[1][0], s[1][1]); + // MIX2(s[2][0], s[2][1]); + // MIX2(s[3][0], s[3][1]); + // MIX2(s[4][0], s[4][1]); + // MIX2(s[5][0], s[5][1]); + // MIX2(s[6][0], s[6][1]); + // MIX2(s[7][0], s[7][1]); + // + // // Round 5 + // AES2_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16); + // + // MIX2(s[0][0], s[0][1]); + // MIX2(s[1][0], s[1][1]); + // MIX2(s[2][0], s[2][1]); + // MIX2(s[3][0], s[3][1]); + // MIX2(s[4][0], s[4][1]); + // MIX2(s[5][0], s[5][1]); + // MIX2(s[6][0], s[6][1]); + // MIX2(s[7][0], s[7][1]); + // + // // Feed Forward + // s[0][0] = _mm_xor_si128(s[0][0], LOAD(in)); + // s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16)); + // s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 32)); + // s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 48)); + // s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 64)); + // s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 80)); + // s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 96)); + // s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 112)); + // s[4][0] = _mm_xor_si128(s[4][0], LOAD(in + 128)); + // s[4][1] = _mm_xor_si128(s[4][1], LOAD(in + 144)); + // s[5][0] = _mm_xor_si128(s[5][0], LOAD(in + 160)); + // s[5][1] = _mm_xor_si128(s[5][1], LOAD(in + 176)); + // s[6][0] = _mm_xor_si128(s[6][0], LOAD(in + 192)); + // s[6][1] = _mm_xor_si128(s[6][1], LOAD(in + 208)); + // s[7][0] = _mm_xor_si128(s[7][0], LOAD(in + 224)); + // s[7][1] = _mm_xor_si128(s[7][1], LOAD(in + 240)); + // + // STORE(out, s[0][0]); + // STORE(out + 16, s[0][1]); + // STORE(out + 32, s[1][0]); + // STORE(out + 48, s[1][1]); + // STORE(out + 64, s[2][0]); + // STORE(out + 80, s[2][1]); + // STORE(out + 96, s[3][0]); + // STORE(out + 112, s[3][1]); + // STORE(out + 128, s[4][0]); + // STORE(out + 144, s[4][1]); + // STORE(out + 160, s[5][0]); + // STORE(out + 176, s[5][1]); + // STORE(out + 192, s[6][0]); + // STORE(out + 208, s[6][1]); + // STORE(out + 224, s[7][0]); + // STORE(out + 240, s[7][1]); +} + +void haraka512(unsigned char *out, const unsigned char *in) { + u128 s[4], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + s[2] = LOAD(in + 32); + s[3] = LOAD(in + 48); + + AES4(s[0], s[1], s[2], s[3], 0); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 8); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 16); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 24); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 32); + MIX4(s[0], s[1], s[2], s[3]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + s[2] = _mm_xor_si128(s[2], LOAD(in + 32)); + s[3] = _mm_xor_si128(s[3], LOAD(in + 48)); + + TRUNCSTORE(out, s[0], s[1], s[2], s[3]); +} + +void haraka512_zero(unsigned char *out, const unsigned char *in) { + u128 s[4], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + s[2] = LOAD(in + 32); + s[3] = LOAD(in + 48); + + AES4_zero(s[0], s[1], s[2], s[3], 0); + MIX4(s[0], s[1], s[2], s[3]); + + AES4_zero(s[0], s[1], s[2], s[3], 8); + MIX4(s[0], s[1], s[2], s[3]); + + AES4_zero(s[0], s[1], s[2], s[3], 16); + MIX4(s[0], s[1], s[2], s[3]); + + AES4_zero(s[0], s[1], s[2], s[3], 24); + MIX4(s[0], s[1], s[2], s[3]); + + AES4_zero(s[0], s[1], s[2], s[3], 32); + MIX4(s[0], s[1], s[2], s[3]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + s[2] = _mm_xor_si128(s[2], LOAD(in + 32)); + s[3] = _mm_xor_si128(s[3], LOAD(in + 48)); + + TRUNCSTORE(out, s[0], s[1], s[2], s[3]); +} + +void haraka512_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) { + u128 s[4], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + s[2] = LOAD(in + 32); + s[3] = LOAD(in + 48); + + AES4(s[0], s[1], s[2], s[3], 0); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 8); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 16); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 24); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 32); + MIX4(s[0], s[1], s[2], s[3]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + s[2] = _mm_xor_si128(s[2], LOAD(in + 32)); + s[3] = _mm_xor_si128(s[3], LOAD(in + 48)); + + TRUNCSTORE(out, s[0], s[1], s[2], s[3]); +} + +void haraka512_4x(unsigned char *out, const unsigned char *in) { + u128 s[4][4], tmp; + + s[0][0] = LOAD(in); + s[0][1] = LOAD(in + 16); + s[0][2] = LOAD(in + 32); + s[0][3] = LOAD(in + 48); + s[1][0] = LOAD(in + 64); + s[1][1] = LOAD(in + 80); + s[1][2] = LOAD(in + 96); + s[1][3] = LOAD(in + 112); + s[2][0] = LOAD(in + 128); + s[2][1] = LOAD(in + 144); + s[2][2] = LOAD(in + 160); + s[2][3] = LOAD(in + 176); + s[3][0] = LOAD(in + 192); + s[3][1] = LOAD(in + 208); + s[3][2] = LOAD(in + 224); + s[3][3] = LOAD(in + 240); + + AES4_4x(s[0], s[1], s[2], s[3], 0); + MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + + AES4_4x(s[0], s[1], s[2], s[3], 8); + MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + + AES4_4x(s[0], s[1], s[2], s[3], 16); + MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + + AES4_4x(s[0], s[1], s[2], s[3], 24); + MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + + AES4_4x(s[0], s[1], s[2], s[3], 32); + MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + + + s[0][0] = _mm_xor_si128(s[0][0], LOAD(in)); + s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16)); + s[0][2] = _mm_xor_si128(s[0][2], LOAD(in + 32)); + s[0][3] = _mm_xor_si128(s[0][3], LOAD(in + 48)); + s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 64)); + s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 80)); + s[1][2] = _mm_xor_si128(s[1][2], LOAD(in + 96)); + s[1][3] = _mm_xor_si128(s[1][3], LOAD(in + 112)); + s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 128)); + s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 144)); + s[2][2] = _mm_xor_si128(s[2][2], LOAD(in + 160)); + s[2][3] = _mm_xor_si128(s[2][3], LOAD(in + 176)); + s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 192)); + s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 208)); + s[3][2] = _mm_xor_si128(s[3][2], LOAD(in + 224)); + s[3][3] = _mm_xor_si128(s[3][3], LOAD(in + 240)); + + TRUNCSTORE(out, s[0][0], s[0][1], s[0][2], s[0][3]); + TRUNCSTORE(out + 32, s[1][0], s[1][1], s[1][2], s[1][3]); + TRUNCSTORE(out + 64, s[2][0], s[2][1], s[2][2], s[2][3]); + TRUNCSTORE(out + 96, s[3][0], s[3][1], s[3][2], s[3][3]); +} + +void haraka512_8x(unsigned char *out, const unsigned char *in) { + // This is faster on Skylake, the code below is faster on Haswell. + haraka512_4x(out, in); + haraka512_4x(out + 128, in + 256); + + // u128 s[8][4], tmp; + // + // s[0][0] = LOAD(in); + // s[0][1] = LOAD(in + 16); + // s[0][2] = LOAD(in + 32); + // s[0][3] = LOAD(in + 48); + // s[1][0] = LOAD(in + 64); + // s[1][1] = LOAD(in + 80); + // s[1][2] = LOAD(in + 96); + // s[1][3] = LOAD(in + 112); + // s[2][0] = LOAD(in + 128); + // s[2][1] = LOAD(in + 144); + // s[2][2] = LOAD(in + 160); + // s[2][3] = LOAD(in + 176); + // s[3][0] = LOAD(in + 192); + // s[3][1] = LOAD(in + 208); + // s[3][2] = LOAD(in + 224); + // s[3][3] = LOAD(in + 240); + // s[4][0] = LOAD(in + 256); + // s[4][1] = LOAD(in + 272); + // s[4][2] = LOAD(in + 288); + // s[4][3] = LOAD(in + 304); + // s[5][0] = LOAD(in + 320); + // s[5][1] = LOAD(in + 336); + // s[5][2] = LOAD(in + 352); + // s[5][3] = LOAD(in + 368); + // s[6][0] = LOAD(in + 384); + // s[6][1] = LOAD(in + 400); + // s[6][2] = LOAD(in + 416); + // s[6][3] = LOAD(in + 432); + // s[7][0] = LOAD(in + 448); + // s[7][1] = LOAD(in + 464); + // s[7][2] = LOAD(in + 480); + // s[7][3] = LOAD(in + 496); + // + // AES4_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0); + // MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + // MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + // MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + // MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + // MIX4(s[4][0], s[4][1], s[4][2], s[4][3]); + // MIX4(s[5][0], s[5][1], s[5][2], s[5][3]); + // MIX4(s[6][0], s[6][1], s[6][2], s[6][3]); + // MIX4(s[7][0], s[7][1], s[7][2], s[7][3]); + // + // AES4_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8); + // MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + // MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + // MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + // MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + // MIX4(s[4][0], s[4][1], s[4][2], s[4][3]); + // MIX4(s[5][0], s[5][1], s[5][2], s[5][3]); + // MIX4(s[6][0], s[6][1], s[6][2], s[6][3]); + // MIX4(s[7][0], s[7][1], s[7][2], s[7][3]); + // + // AES4_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16); + // MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + // MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + // MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + // MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + // MIX4(s[4][0], s[4][1], s[4][2], s[4][3]); + // MIX4(s[5][0], s[5][1], s[5][2], s[5][3]); + // MIX4(s[6][0], s[6][1], s[6][2], s[6][3]); + // MIX4(s[7][0], s[7][1], s[7][2], s[7][3]); + // + // AES4_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24); + // MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + // MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + // MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + // MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + // MIX4(s[4][0], s[4][1], s[4][2], s[4][3]); + // MIX4(s[5][0], s[5][1], s[5][2], s[5][3]); + // MIX4(s[6][0], s[6][1], s[6][2], s[6][3]); + // MIX4(s[7][0], s[7][1], s[7][2], s[7][3]); + // + // AES4_8x(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32); + // MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); + // MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); + // MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); + // MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); + // MIX4(s[4][0], s[4][1], s[4][2], s[4][3]); + // MIX4(s[5][0], s[5][1], s[5][2], s[5][3]); + // MIX4(s[6][0], s[6][1], s[6][2], s[6][3]); + // MIX4(s[7][0], s[7][1], s[7][2], s[7][3]); + // + // + // s[0][0] = _mm_xor_si128(s[0][0], LOAD(in)); + // s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16)); + // s[0][2] = _mm_xor_si128(s[0][2], LOAD(in + 32)); + // s[0][3] = _mm_xor_si128(s[0][3], LOAD(in + 48)); + // s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 64)); + // s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 80)); + // s[1][2] = _mm_xor_si128(s[1][2], LOAD(in + 96)); + // s[1][3] = _mm_xor_si128(s[1][3], LOAD(in + 112)); + // s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 128)); + // s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 144)); + // s[2][2] = _mm_xor_si128(s[2][2], LOAD(in + 160)); + // s[2][3] = _mm_xor_si128(s[2][3], LOAD(in + 176)); + // s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 192)); + // s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 208)); + // s[3][2] = _mm_xor_si128(s[3][2], LOAD(in + 224)); + // s[3][3] = _mm_xor_si128(s[3][3], LOAD(in + 240)); + // s[4][0] = _mm_xor_si128(s[4][0], LOAD(in + 256)); + // s[4][1] = _mm_xor_si128(s[4][1], LOAD(in + 272)); + // s[4][2] = _mm_xor_si128(s[4][2], LOAD(in + 288)); + // s[4][3] = _mm_xor_si128(s[4][3], LOAD(in + 304)); + // s[5][0] = _mm_xor_si128(s[5][0], LOAD(in + 320)); + // s[5][1] = _mm_xor_si128(s[5][1], LOAD(in + 336)); + // s[5][2] = _mm_xor_si128(s[5][2], LOAD(in + 352)); + // s[5][3] = _mm_xor_si128(s[5][3], LOAD(in + 368)); + // s[6][0] = _mm_xor_si128(s[6][0], LOAD(in + 384)); + // s[6][1] = _mm_xor_si128(s[6][1], LOAD(in + 400)); + // s[6][2] = _mm_xor_si128(s[6][2], LOAD(in + 416)); + // s[6][3] = _mm_xor_si128(s[6][3], LOAD(in + 432)); + // s[7][0] = _mm_xor_si128(s[7][0], LOAD(in + 448)); + // s[7][1] = _mm_xor_si128(s[7][1], LOAD(in + 464)); + // s[7][2] = _mm_xor_si128(s[7][2], LOAD(in + 480)); + // s[7][3] = _mm_xor_si128(s[7][3], LOAD(in + 496)); + // + // TRUNCSTORE(out, s[0][0], s[0][1], s[0][2], s[0][3]); + // TRUNCSTORE(out + 32, s[1][0], s[1][1], s[1][2], s[1][3]); + // TRUNCSTORE(out + 64, s[2][0], s[2][1], s[2][2], s[2][3]); + // TRUNCSTORE(out + 96, s[3][0], s[3][1], s[3][2], s[3][3]); + // TRUNCSTORE(out + 128, s[4][0], s[4][1], s[4][2], s[4][3]); + // TRUNCSTORE(out + 160, s[5][0], s[5][1], s[5][2], s[5][3]); + // TRUNCSTORE(out + 192, s[6][0], s[6][1], s[6][2], s[6][3]); + // TRUNCSTORE(out + 224, s[7][0], s[7][1], s[7][2], s[7][3]); +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/haraka.h b/src/Native/libverushash/crypto/haraka.h new file mode 100644 index 000000000..91157fee6 --- /dev/null +++ b/src/Native/libverushash/crypto/haraka.h @@ -0,0 +1,128 @@ +/* +The MIT License (MIT) + +Copyright (c) 2016 kste + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Optimized Implementations for Haraka256 and Haraka512 +*/ +#ifndef HARAKA_H_ +#define HARAKA_H_ + +#include "immintrin.h" + +#define NUMROUNDS 5 + +#ifdef _WIN32 +typedef unsigned long long u64; +#else +typedef unsigned long u64; +#endif +typedef __m128i u128; + +extern u128 rc[40]; + +#define LOAD(src) _mm_load_si128((u128 *)(src)) +#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src) + +#define AES2(s0, s1, rci) \ + s0 = _mm_aesenc_si128(s0, rc[rci]); \ + s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ + s0 = _mm_aesenc_si128(s0, rc[rci + 2]); \ + s1 = _mm_aesenc_si128(s1, rc[rci + 3]); + +#define AES2_4x(s0, s1, s2, s3, rci) \ + AES2(s0[0], s0[1], rci); \ + AES2(s1[0], s1[1], rci); \ + AES2(s2[0], s2[1], rci); \ + AES2(s3[0], s3[1], rci); + +#define AES2_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ + AES2_4x(s0, s1, s2, s3, rci); \ + AES2_4x(s4, s5, s6, s7, rci); + +#define AES4(s0, s1, s2, s3, rci) \ + s0 = _mm_aesenc_si128(s0, rc[rci]); \ + s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ + s2 = _mm_aesenc_si128(s2, rc[rci + 2]); \ + s3 = _mm_aesenc_si128(s3, rc[rci + 3]); \ + s0 = _mm_aesenc_si128(s0, rc[rci + 4]); \ + s1 = _mm_aesenc_si128(s1, rc[rci + 5]); \ + s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \ + s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \ + +#define AES4_zero(s0, s1, s2, s3, rci) \ + s0 = _mm_aesenc_si128(s0, rc0[rci]); \ + s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \ + s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \ + s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \ + s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \ + s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \ + s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \ + s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \ + +#define AES4_4x(s0, s1, s2, s3, rci) \ + AES4(s0[0], s0[1], s0[2], s0[3], rci); \ + AES4(s1[0], s1[1], s1[2], s1[3], rci); \ + AES4(s2[0], s2[1], s2[2], s2[3], rci); \ + AES4(s3[0], s3[1], s3[2], s3[3], rci); + +#define AES4_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ + AES4_4x(s0, s1, s2, s3, rci); \ + AES4_4x(s4, s5, s6, s7, rci); + +#define MIX2(s0, s1) \ + tmp = _mm_unpacklo_epi32(s0, s1); \ + s1 = _mm_unpackhi_epi32(s0, s1); \ + s0 = tmp; + +#define MIX4(s0, s1, s2, s3) \ + tmp = _mm_unpacklo_epi32(s0, s1); \ + s0 = _mm_unpackhi_epi32(s0, s1); \ + s1 = _mm_unpacklo_epi32(s2, s3); \ + s2 = _mm_unpackhi_epi32(s2, s3); \ + s3 = _mm_unpacklo_epi32(s0, s2); \ + s0 = _mm_unpackhi_epi32(s0, s2); \ + s2 = _mm_unpackhi_epi32(s1, tmp); \ + s1 = _mm_unpacklo_epi32(s1, tmp); + +#define TRUNCSTORE(out, s0, s1, s2, s3) \ + *(u64*)(out) = *(((u64*)&(s0) + 1)); \ + *(u64*)(out + 8) = *(((u64*)&(s1) + 1)); \ + *(u64*)(out + 16) = *(((u64*)&(s2) + 0)); \ + *(u64*)(out + 24) = *(((u64*)&(s3) + 0)); + +void load_constants(); +void test_implementations(); + +void load_constants(); + +void haraka256(unsigned char *out, const unsigned char *in); +void haraka256_keyed(unsigned char *out, const unsigned char *in, const u128 *rc); +void haraka256_4x(unsigned char *out, const unsigned char *in); +void haraka256_8x(unsigned char *out, const unsigned char *in); + +void haraka512(unsigned char *out, const unsigned char *in); +void haraka512_zero(unsigned char *out, const unsigned char *in); +void haraka512_keyed(unsigned char *out, const unsigned char *in, const u128 *rc); +void haraka512_4x(unsigned char *out, const unsigned char *in); +void haraka512_8x(unsigned char *out, const unsigned char *in); + +#endif \ No newline at end of file diff --git a/src/Native/libverushash/crypto/haraka_portable.c b/src/Native/libverushash/crypto/haraka_portable.c new file mode 100644 index 000000000..4b46ab468 --- /dev/null +++ b/src/Native/libverushash/crypto/haraka_portable.c @@ -0,0 +1,428 @@ +/* +Plain C implementation of the Haraka256 and Haraka512 permutations. +*/ +#include +#include +#include + +#include "haraka_portable.h" + +#define HARAKAS_RATE 32 + +static const unsigned char haraka_rc[40][16] = { + {0x9d, 0x7b, 0x81, 0x75, 0xf0, 0xfe, 0xc5, 0xb2, 0x0a, 0xc0, 0x20, 0xe6, 0x4c, 0x70, 0x84, 0x06}, + {0x17, 0xf7, 0x08, 0x2f, 0xa4, 0x6b, 0x0f, 0x64, 0x6b, 0xa0, 0xf3, 0x88, 0xe1, 0xb4, 0x66, 0x8b}, + {0x14, 0x91, 0x02, 0x9f, 0x60, 0x9d, 0x02, 0xcf, 0x98, 0x84, 0xf2, 0x53, 0x2d, 0xde, 0x02, 0x34}, + {0x79, 0x4f, 0x5b, 0xfd, 0xaf, 0xbc, 0xf3, 0xbb, 0x08, 0x4f, 0x7b, 0x2e, 0xe6, 0xea, 0xd6, 0x0e}, + {0x44, 0x70, 0x39, 0xbe, 0x1c, 0xcd, 0xee, 0x79, 0x8b, 0x44, 0x72, 0x48, 0xcb, 0xb0, 0xcf, 0xcb}, + {0x7b, 0x05, 0x8a, 0x2b, 0xed, 0x35, 0x53, 0x8d, 0xb7, 0x32, 0x90, 0x6e, 0xee, 0xcd, 0xea, 0x7e}, + {0x1b, 0xef, 0x4f, 0xda, 0x61, 0x27, 0x41, 0xe2, 0xd0, 0x7c, 0x2e, 0x5e, 0x43, 0x8f, 0xc2, 0x67}, + {0x3b, 0x0b, 0xc7, 0x1f, 0xe2, 0xfd, 0x5f, 0x67, 0x07, 0xcc, 0xca, 0xaf, 0xb0, 0xd9, 0x24, 0x29}, + {0xee, 0x65, 0xd4, 0xb9, 0xca, 0x8f, 0xdb, 0xec, 0xe9, 0x7f, 0x86, 0xe6, 0xf1, 0x63, 0x4d, 0xab}, + {0x33, 0x7e, 0x03, 0xad, 0x4f, 0x40, 0x2a, 0x5b, 0x64, 0xcd, 0xb7, 0xd4, 0x84, 0xbf, 0x30, 0x1c}, + {0x00, 0x98, 0xf6, 0x8d, 0x2e, 0x8b, 0x02, 0x69, 0xbf, 0x23, 0x17, 0x94, 0xb9, 0x0b, 0xcc, 0xb2}, + {0x8a, 0x2d, 0x9d, 0x5c, 0xc8, 0x9e, 0xaa, 0x4a, 0x72, 0x55, 0x6f, 0xde, 0xa6, 0x78, 0x04, 0xfa}, + {0xd4, 0x9f, 0x12, 0x29, 0x2e, 0x4f, 0xfa, 0x0e, 0x12, 0x2a, 0x77, 0x6b, 0x2b, 0x9f, 0xb4, 0xdf}, + {0xee, 0x12, 0x6a, 0xbb, 0xae, 0x11, 0xd6, 0x32, 0x36, 0xa2, 0x49, 0xf4, 0x44, 0x03, 0xa1, 0x1e}, + {0xa6, 0xec, 0xa8, 0x9c, 0xc9, 0x00, 0x96, 0x5f, 0x84, 0x00, 0x05, 0x4b, 0x88, 0x49, 0x04, 0xaf}, + {0xec, 0x93, 0xe5, 0x27, 0xe3, 0xc7, 0xa2, 0x78, 0x4f, 0x9c, 0x19, 0x9d, 0xd8, 0x5e, 0x02, 0x21}, + {0x73, 0x01, 0xd4, 0x82, 0xcd, 0x2e, 0x28, 0xb9, 0xb7, 0xc9, 0x59, 0xa7, 0xf8, 0xaa, 0x3a, 0xbf}, + {0x6b, 0x7d, 0x30, 0x10, 0xd9, 0xef, 0xf2, 0x37, 0x17, 0xb0, 0x86, 0x61, 0x0d, 0x70, 0x60, 0x62}, + {0xc6, 0x9a, 0xfc, 0xf6, 0x53, 0x91, 0xc2, 0x81, 0x43, 0x04, 0x30, 0x21, 0xc2, 0x45, 0xca, 0x5a}, + {0x3a, 0x94, 0xd1, 0x36, 0xe8, 0x92, 0xaf, 0x2c, 0xbb, 0x68, 0x6b, 0x22, 0x3c, 0x97, 0x23, 0x92}, + {0xb4, 0x71, 0x10, 0xe5, 0x58, 0xb9, 0xba, 0x6c, 0xeb, 0x86, 0x58, 0x22, 0x38, 0x92, 0xbf, 0xd3}, + {0x8d, 0x12, 0xe1, 0x24, 0xdd, 0xfd, 0x3d, 0x93, 0x77, 0xc6, 0xf0, 0xae, 0xe5, 0x3c, 0x86, 0xdb}, + {0xb1, 0x12, 0x22, 0xcb, 0xe3, 0x8d, 0xe4, 0x83, 0x9c, 0xa0, 0xeb, 0xff, 0x68, 0x62, 0x60, 0xbb}, + {0x7d, 0xf7, 0x2b, 0xc7, 0x4e, 0x1a, 0xb9, 0x2d, 0x9c, 0xd1, 0xe4, 0xe2, 0xdc, 0xd3, 0x4b, 0x73}, + {0x4e, 0x92, 0xb3, 0x2c, 0xc4, 0x15, 0x14, 0x4b, 0x43, 0x1b, 0x30, 0x61, 0xc3, 0x47, 0xbb, 0x43}, + {0x99, 0x68, 0xeb, 0x16, 0xdd, 0x31, 0xb2, 0x03, 0xf6, 0xef, 0x07, 0xe7, 0xa8, 0x75, 0xa7, 0xdb}, + {0x2c, 0x47, 0xca, 0x7e, 0x02, 0x23, 0x5e, 0x8e, 0x77, 0x59, 0x75, 0x3c, 0x4b, 0x61, 0xf3, 0x6d}, + {0xf9, 0x17, 0x86, 0xb8, 0xb9, 0xe5, 0x1b, 0x6d, 0x77, 0x7d, 0xde, 0xd6, 0x17, 0x5a, 0xa7, 0xcd}, + {0x5d, 0xee, 0x46, 0xa9, 0x9d, 0x06, 0x6c, 0x9d, 0xaa, 0xe9, 0xa8, 0x6b, 0xf0, 0x43, 0x6b, 0xec}, + {0xc1, 0x27, 0xf3, 0x3b, 0x59, 0x11, 0x53, 0xa2, 0x2b, 0x33, 0x57, 0xf9, 0x50, 0x69, 0x1e, 0xcb}, + {0xd9, 0xd0, 0x0e, 0x60, 0x53, 0x03, 0xed, 0xe4, 0x9c, 0x61, 0xda, 0x00, 0x75, 0x0c, 0xee, 0x2c}, + {0x50, 0xa3, 0xa4, 0x63, 0xbc, 0xba, 0xbb, 0x80, 0xab, 0x0c, 0xe9, 0x96, 0xa1, 0xa5, 0xb1, 0xf0}, + {0x39, 0xca, 0x8d, 0x93, 0x30, 0xde, 0x0d, 0xab, 0x88, 0x29, 0x96, 0x5e, 0x02, 0xb1, 0x3d, 0xae}, + {0x42, 0xb4, 0x75, 0x2e, 0xa8, 0xf3, 0x14, 0x88, 0x0b, 0xa4, 0x54, 0xd5, 0x38, 0x8f, 0xbb, 0x17}, + {0xf6, 0x16, 0x0a, 0x36, 0x79, 0xb7, 0xb6, 0xae, 0xd7, 0x7f, 0x42, 0x5f, 0x5b, 0x8a, 0xbb, 0x34}, + {0xde, 0xaf, 0xba, 0xff, 0x18, 0x59, 0xce, 0x43, 0x38, 0x54, 0xe5, 0xcb, 0x41, 0x52, 0xf6, 0x26}, + {0x78, 0xc9, 0x9e, 0x83, 0xf7, 0x9c, 0xca, 0xa2, 0x6a, 0x02, 0xf3, 0xb9, 0x54, 0x9a, 0xe9, 0x4c}, + {0x35, 0x12, 0x90, 0x22, 0x28, 0x6e, 0xc0, 0x40, 0xbe, 0xf7, 0xdf, 0x1b, 0x1a, 0xa5, 0x51, 0xae}, + {0xcf, 0x59, 0xa6, 0x48, 0x0f, 0xbc, 0x73, 0xc1, 0x2b, 0xd2, 0x7e, 0xba, 0x3c, 0x61, 0xc1, 0xa0}, + {0xa1, 0x9d, 0xc5, 0xe9, 0xfd, 0xbd, 0xd6, 0x4a, 0x88, 0x82, 0x28, 0x02, 0x03, 0xcc, 0x6a, 0x75} +}; + +static unsigned char rc[40][16]; +static unsigned char rc0[40][16]; +static unsigned char rc_sseed[40][16]; + +static const unsigned char sbox[256] = +{ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, + 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, + 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, + 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, + 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, + 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, + 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, + 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, + 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, + 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, + 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, + 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, + 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, + 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, + 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, + 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, + 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, + 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; + +#define XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b)) + +// Simulate _mm_aesenc_si128 instructions from AESNI +void aesenc(unsigned char *s, const unsigned char *rk) +{ + unsigned char i, t, u, v[4][4]; + for (i = 0; i < 16; ++i) { + v[((i / 4) + 4 - (i%4) ) % 4][i % 4] = sbox[s[i]]; + } + for (i = 0; i < 4; ++i) { + t = v[i][0]; + u = v[i][0] ^ v[i][1] ^ v[i][2] ^ v[i][3]; + v[i][0] ^= u ^ XT(v[i][0] ^ v[i][1]); + v[i][1] ^= u ^ XT(v[i][1] ^ v[i][2]); + v[i][2] ^= u ^ XT(v[i][2] ^ v[i][3]); + v[i][3] ^= u ^ XT(v[i][3] ^ t); + } + for (i = 0; i < 16; ++i) { + s[i] = v[i / 4][i % 4] ^ rk[i]; + } +} + +// Simulate _mm_unpacklo_epi32 +void unpacklo32(unsigned char *t, unsigned char *a, unsigned char *b) +{ + unsigned char tmp[16]; + memcpy(tmp, a, 4); + memcpy(tmp + 4, b, 4); + memcpy(tmp + 8, a + 4, 4); + memcpy(tmp + 12, b + 4, 4); + memcpy(t, tmp, 16); +} + +// Simulate _mm_unpackhi_epi32 +void unpackhi32(unsigned char *t, unsigned char *a, unsigned char *b) +{ + unsigned char tmp[16]; + memcpy(tmp, a + 8, 4); + memcpy(tmp + 4, b + 8, 4); + memcpy(tmp + 8, a + 12, 4); + memcpy(tmp + 12, b + 12, 4); + memcpy(t, tmp, 16); +} + +void load_constants_port() +{ + /* Use the standard constants to generate tweaked ones. */ + memcpy(rc, haraka_rc, 40*16); +} + +void tweak_constants(const unsigned char *pk_seed, const unsigned char *sk_seed, + unsigned long long seed_length) +{ + unsigned char buf[40*16]; + + /* Use the standard constants to generate tweaked ones. */ + memcpy(rc, haraka_rc, 40*16); + + /* Constants for sk.seed */ + if (sk_seed != NULL) { + haraka_S(buf, 40*16, sk_seed, seed_length); + memcpy(rc_sseed, buf, 40*16); + } + + /* Constants for pk.seed */ + haraka_S(buf, 40*16, pk_seed, seed_length); + memcpy(rc, buf, 40*16); +} + +static void haraka_S_absorb(unsigned char *s, + const unsigned char *m, unsigned long long mlen, + unsigned char p) +{ + unsigned long long i; + unsigned char t[32]; + + while (mlen >= 32) { + // XOR block to state + for (i = 0; i < 32; ++i) { + s[i] ^= m[i]; + } + haraka512_perm(s, s); + mlen -= 32; + m += 32; + } + + for (i = 0; i < 32; ++i) { + t[i] = 0; + } + for (i = 0; i < mlen; ++i) { + t[i] = m[i]; + } + t[i] = p; + t[32 - 1] |= 128; + for (i = 0; i < 32; ++i) { + s[i] ^= t[i]; + } +} + +static void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks, + unsigned char *s, unsigned int r) +{ + while (nblocks > 0) { + haraka512_perm(s, s); + memcpy(h, s, HARAKAS_RATE); + h += r; + nblocks--; + } +} + + +void haraka_S(unsigned char *out, unsigned long long outlen, + const unsigned char *in, unsigned long long inlen) +{ + unsigned long long i; + unsigned char s[64]; + unsigned char d[32]; + + for (i = 0; i < 64; i++) { + s[i] = 0; + } + haraka_S_absorb(s, in, inlen, 0x1F); + + haraka_S_squeezeblocks(out, outlen / 32, s, 32); + out += (outlen / 32) * 32; + + if (outlen % 32) { + haraka_S_squeezeblocks(d, 1, s, 32); + for (i = 0; i < outlen % 32; i++) { + out[i] = d[i]; + } + } +} + +void haraka512_perm(unsigned char *out, const unsigned char *in) +{ + int i, j; + + unsigned char s[64], tmp[16]; + + memcpy(s, in, 16); + memcpy(s + 16, in + 16, 16); + memcpy(s + 32, in + 32, 16); + memcpy(s + 48, in + 48, 16); + + for (i = 0; i < 5; ++i) { + // aes round(s) + for (j = 0; j < 2; ++j) { + aesenc(s, rc[4*2*i + 4*j]); + aesenc(s + 16, rc[4*2*i + 4*j + 1]); + aesenc(s + 32, rc[4*2*i + 4*j + 2]); + aesenc(s + 48, rc[4*2*i + 4*j + 3]); + } + + // mixing + unpacklo32(tmp, s, s + 16); + unpackhi32(s, s, s + 16); + unpacklo32(s + 16, s + 32, s + 48); + unpackhi32(s + 32, s + 32, s + 48); + unpacklo32(s + 48, s, s + 32); + unpackhi32(s, s, s + 32); + unpackhi32(s + 32, s + 16, tmp); + unpacklo32(s + 16, s + 16, tmp); + } + + memcpy(out, s, 64); +} + +void haraka512_perm_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) +{ + int i, j; + + unsigned char s[64], tmp[16]; + + memcpy(s, in, 16); + memcpy(s + 16, in + 16, 16); + memcpy(s + 32, in + 32, 16); + memcpy(s + 48, in + 48, 16); + + for (i = 0; i < 5; ++i) { + // aes round(s) + for (j = 0; j < 2; ++j) { + aesenc(s, (const unsigned char *)&rc[4*2*i + 4*j]); + aesenc(s + 16, (const unsigned char *)&rc[4*2*i + 4*j + 1]); + aesenc(s + 32, (const unsigned char *)&rc[4*2*i + 4*j + 2]); + aesenc(s + 48, (const unsigned char *)&rc[4*2*i + 4*j + 3]); + } + + // mixing + unpacklo32(tmp, s, s + 16); + unpackhi32(s, s, s + 16); + unpacklo32(s + 16, s + 32, s + 48); + unpackhi32(s + 32, s + 32, s + 48); + unpacklo32(s + 48, s, s + 32); + unpackhi32(s, s, s + 32); + unpackhi32(s + 32, s + 16, tmp); + unpacklo32(s + 16, s + 16, tmp); + } + + memcpy(out, s, 64); +} + +void haraka512_port(unsigned char *out, const unsigned char *in) +{ + int i; + + unsigned char buf[64]; + + haraka512_perm(buf, in); + /* Feed-forward */ + for (i = 0; i < 64; i++) { + buf[i] = buf[i] ^ in[i]; + } + + /* Truncated */ + memcpy(out, buf + 8, 8); + memcpy(out + 8, buf + 24, 8); + memcpy(out + 16, buf + 32, 8); + memcpy(out + 24, buf + 48, 8); +} + +void haraka512_port_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) +{ + int i; + + unsigned char buf[64]; + + haraka512_perm_keyed(buf, in, rc); + /* Feed-forward */ + for (i = 0; i < 64; i++) { + buf[i] = buf[i] ^ in[i]; + } + + /* Truncated */ + memcpy(out, buf + 8, 8); + memcpy(out + 8, buf + 24, 8); + memcpy(out + 16, buf + 32, 8); + memcpy(out + 24, buf + 48, 8); +} + +void haraka512_perm_zero(unsigned char *out, const unsigned char *in) +{ + int i, j; + + unsigned char s[64], tmp[16]; + + memcpy(s, in, 16); + memcpy(s + 16, in + 16, 16); + memcpy(s + 32, in + 32, 16); + memcpy(s + 48, in + 48, 16); + + for (i = 0; i < 5; ++i) { + // aes round(s) + for (j = 0; j < 2; ++j) { + aesenc(s, rc0[4*2*i + 4*j]); + aesenc(s + 16, rc0[4*2*i + 4*j + 1]); + aesenc(s + 32, rc0[4*2*i + 4*j + 2]); + aesenc(s + 48, rc0[4*2*i + 4*j + 3]); + } + + // mixing + unpacklo32(tmp, s, s + 16); + unpackhi32(s, s, s + 16); + unpacklo32(s + 16, s + 32, s + 48); + unpackhi32(s + 32, s + 32, s + 48); + unpacklo32(s + 48, s, s + 32); + unpackhi32(s, s, s + 32); + unpackhi32(s + 32, s + 16, tmp); + unpacklo32(s + 16, s + 16, tmp); + } + + memcpy(out, s, 64); +} + +void haraka512_port_zero(unsigned char *out, const unsigned char *in) +{ + int i; + + unsigned char buf[64]; + + haraka512_perm_zero(buf, in); + /* Feed-forward */ + for (i = 0; i < 64; i++) { + buf[i] = buf[i] ^ in[i]; + } + + /* Truncated */ + memcpy(out, buf + 8, 8); + memcpy(out + 8, buf + 24, 8); + memcpy(out + 16, buf + 32, 8); + memcpy(out + 24, buf + 48, 8); +} + +void haraka256_port(unsigned char *out, const unsigned char *in) +{ + int i, j; + + unsigned char s[32], tmp[16]; + + memcpy(s, in, 16); + memcpy(s + 16, in + 16, 16); + + for (i = 0; i < 5; ++i) { + // aes round(s) + for (j = 0; j < 2; ++j) { + aesenc(s, rc[2*2*i + 2*j]); + aesenc(s + 16, rc[2*2*i + 2*j + 1]); + } + + // mixing + unpacklo32(tmp, s, s + 16); + unpackhi32(s + 16, s, s + 16); + memcpy(s, tmp, 16); + } + + /* Feed-forward */ + for (i = 0; i < 32; i++) { + out[i] = in[i] ^ s[i]; + } +} + +void haraka256_sk(unsigned char *out, const unsigned char *in) +{ + int i, j; + + unsigned char s[32], tmp[16]; + + memcpy(s, in, 16); + memcpy(s + 16, in + 16, 16); + + for (i = 0; i < 5; ++i) { + // aes round(s) + for (j = 0; j < 2; ++j) { + aesenc(s, rc_sseed[2*2*i + 2*j]); + aesenc(s + 16, rc_sseed[2*2*i + 2*j + 1]); + } + + // mixing + unpacklo32(tmp, s, s + 16); + unpackhi32(s + 16, s, s + 16); + memcpy(s, tmp, 16); + } + + /* Feed-forward */ + for (i = 0; i < 32; i++) { + out[i] = in[i] ^ s[i]; + } +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/haraka_portable.h b/src/Native/libverushash/crypto/haraka_portable.h new file mode 100644 index 000000000..219aba91f --- /dev/null +++ b/src/Native/libverushash/crypto/haraka_portable.h @@ -0,0 +1,84 @@ +#ifndef SPX_HARAKA_H +#define SPX_HARAKA_H + +#include "immintrin.h" + +#define NUMROUNDS 5 + +#ifdef _WIN32 +typedef unsigned long long u64; +#else +typedef unsigned long u64; +#endif +typedef __m128i u128; + +extern void aesenc(unsigned char *s, const unsigned char *rk); + +#define AES2_EMU(s0, s1, rci) \ + aesenc((unsigned char *)&s0, (unsigned char *)&(rc[rci])); \ + aesenc((unsigned char *)&s1, (unsigned char *)&(rc[rci + 1])); \ + aesenc((unsigned char *)&s0, (unsigned char *)&(rc[rci + 2])); \ + aesenc((unsigned char *)&s1, (unsigned char *)&(rc[rci + 3])); + +typedef unsigned int uint32_t; + +static inline __m128i _mm_unpacklo_epi32_emu(__m128i a, __m128i b) +{ + uint32_t result[4]; + uint32_t *tmp1 = (uint32_t *)&a, *tmp2 = (uint32_t *)&b; + result[0] = tmp1[0]; + result[1] = tmp2[0]; + result[2] = tmp1[1]; + result[3] = tmp2[1]; + return *(__m128i *)result; +} + +static inline __m128i _mm_unpackhi_epi32_emu(__m128i a, __m128i b) +{ + uint32_t result[4]; + uint32_t *tmp1 = (uint32_t *)&a, *tmp2 = (uint32_t *)&b; + result[0] = tmp1[2]; + result[1] = tmp2[2]; + result[2] = tmp1[3]; + result[3] = tmp2[3]; + return *(__m128i *)result; +} + +#define MIX2_EMU(s0, s1) \ + tmp = _mm_unpacklo_epi32_emu(s0, s1); \ + s1 = _mm_unpackhi_epi32_emu(s0, s1); \ + s0 = tmp; + +/* load constants */ +void load_constants_port(); + +/* Tweak constants with seed */ +void tweak_constants(const unsigned char *pk_seed, const unsigned char *sk_seed, + unsigned long long seed_length); + +/* Haraka Sponge */ +void haraka_S(unsigned char *out, unsigned long long outlen, + const unsigned char *in, unsigned long long inlen); + +/* Applies the 512-bit Haraka permutation to in. */ +void haraka512_perm(unsigned char *out, const unsigned char *in); + +/* Implementation of Haraka-512 */ +void haraka512_port(unsigned char *out, const unsigned char *in); + +/* Implementation of Haraka-512 */ +void haraka512_port_keyed(unsigned char *out, const unsigned char *in, const u128 *rc); + +/* Applies the 512-bit Haraka permutation to in, using zero key. */ +void haraka512_perm_zero(unsigned char *out, const unsigned char *in); + +/* Implementation of Haraka-512, using zero key */ +void haraka512_port_zero(unsigned char *out, const unsigned char *in); + +/* Implementation of Haraka-256 */ +void haraka256_port(unsigned char *out, const unsigned char *in); + +/* Implementation of Haraka-256 using sk.seed constants */ +void haraka256_sk(unsigned char *out, const unsigned char *in); + +#endif \ No newline at end of file diff --git a/src/Native/libverushash/crypto/ripemd160.cpp b/src/Native/libverushash/crypto/ripemd160.cpp new file mode 100644 index 000000000..69d9339a5 --- /dev/null +++ b/src/Native/libverushash/crypto/ripemd160.cpp @@ -0,0 +1,291 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#include "ripemd160.h" +#include "common.h" + +#include + +// Internal implementation code. +namespace +{ +/// Internal RIPEMD-160 implementation. +namespace ripemd160 +{ +uint32_t inline f1(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; } +uint32_t inline f2(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (~x & z); } +uint32_t inline f3(uint32_t x, uint32_t y, uint32_t z) { return (x | ~y) ^ z; } +uint32_t inline f4(uint32_t x, uint32_t y, uint32_t z) { return (x & z) | (y & ~z); } +uint32_t inline f5(uint32_t x, uint32_t y, uint32_t z) { return x ^ (y | ~z); } + +/** Initialize RIPEMD-160 state. */ +void inline Initialize(uint32_t* s) +{ + s[0] = 0x67452301ul; + s[1] = 0xEFCDAB89ul; + s[2] = 0x98BADCFEul; + s[3] = 0x10325476ul; + s[4] = 0xC3D2E1F0ul; +} + +uint32_t inline rol(uint32_t x, int i) { return (x << i) | (x >> (32 - i)); } + +void inline Round(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t f, uint32_t x, uint32_t k, int r) +{ + a = rol(a + f + x + k, r) + e; + c = rol(c, 10); +} + +void inline R11(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f1(b, c, d), x, 0, r); } +void inline R21(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f2(b, c, d), x, 0x5A827999ul, r); } +void inline R31(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f3(b, c, d), x, 0x6ED9EBA1ul, r); } +void inline R41(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f4(b, c, d), x, 0x8F1BBCDCul, r); } +void inline R51(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f5(b, c, d), x, 0xA953FD4Eul, r); } + +void inline R12(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f5(b, c, d), x, 0x50A28BE6ul, r); } +void inline R22(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f4(b, c, d), x, 0x5C4DD124ul, r); } +void inline R32(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f3(b, c, d), x, 0x6D703EF3ul, r); } +void inline R42(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f2(b, c, d), x, 0x7A6D76E9ul, r); } +void inline R52(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f1(b, c, d), x, 0, r); } + +/** Perform a RIPEMD-160 transformation, processing a 64-byte chunk. */ +void Transform(uint32_t* s, const unsigned char* chunk) +{ + uint32_t a1 = s[0], b1 = s[1], c1 = s[2], d1 = s[3], e1 = s[4]; + uint32_t a2 = a1, b2 = b1, c2 = c1, d2 = d1, e2 = e1; + uint32_t w0 = ReadLE32(chunk + 0), w1 = ReadLE32(chunk + 4), w2 = ReadLE32(chunk + 8), w3 = ReadLE32(chunk + 12); + uint32_t w4 = ReadLE32(chunk + 16), w5 = ReadLE32(chunk + 20), w6 = ReadLE32(chunk + 24), w7 = ReadLE32(chunk + 28); + uint32_t w8 = ReadLE32(chunk + 32), w9 = ReadLE32(chunk + 36), w10 = ReadLE32(chunk + 40), w11 = ReadLE32(chunk + 44); + uint32_t w12 = ReadLE32(chunk + 48), w13 = ReadLE32(chunk + 52), w14 = ReadLE32(chunk + 56), w15 = ReadLE32(chunk + 60); + + R11(a1, b1, c1, d1, e1, w0, 11); + R12(a2, b2, c2, d2, e2, w5, 8); + R11(e1, a1, b1, c1, d1, w1, 14); + R12(e2, a2, b2, c2, d2, w14, 9); + R11(d1, e1, a1, b1, c1, w2, 15); + R12(d2, e2, a2, b2, c2, w7, 9); + R11(c1, d1, e1, a1, b1, w3, 12); + R12(c2, d2, e2, a2, b2, w0, 11); + R11(b1, c1, d1, e1, a1, w4, 5); + R12(b2, c2, d2, e2, a2, w9, 13); + R11(a1, b1, c1, d1, e1, w5, 8); + R12(a2, b2, c2, d2, e2, w2, 15); + R11(e1, a1, b1, c1, d1, w6, 7); + R12(e2, a2, b2, c2, d2, w11, 15); + R11(d1, e1, a1, b1, c1, w7, 9); + R12(d2, e2, a2, b2, c2, w4, 5); + R11(c1, d1, e1, a1, b1, w8, 11); + R12(c2, d2, e2, a2, b2, w13, 7); + R11(b1, c1, d1, e1, a1, w9, 13); + R12(b2, c2, d2, e2, a2, w6, 7); + R11(a1, b1, c1, d1, e1, w10, 14); + R12(a2, b2, c2, d2, e2, w15, 8); + R11(e1, a1, b1, c1, d1, w11, 15); + R12(e2, a2, b2, c2, d2, w8, 11); + R11(d1, e1, a1, b1, c1, w12, 6); + R12(d2, e2, a2, b2, c2, w1, 14); + R11(c1, d1, e1, a1, b1, w13, 7); + R12(c2, d2, e2, a2, b2, w10, 14); + R11(b1, c1, d1, e1, a1, w14, 9); + R12(b2, c2, d2, e2, a2, w3, 12); + R11(a1, b1, c1, d1, e1, w15, 8); + R12(a2, b2, c2, d2, e2, w12, 6); + + R21(e1, a1, b1, c1, d1, w7, 7); + R22(e2, a2, b2, c2, d2, w6, 9); + R21(d1, e1, a1, b1, c1, w4, 6); + R22(d2, e2, a2, b2, c2, w11, 13); + R21(c1, d1, e1, a1, b1, w13, 8); + R22(c2, d2, e2, a2, b2, w3, 15); + R21(b1, c1, d1, e1, a1, w1, 13); + R22(b2, c2, d2, e2, a2, w7, 7); + R21(a1, b1, c1, d1, e1, w10, 11); + R22(a2, b2, c2, d2, e2, w0, 12); + R21(e1, a1, b1, c1, d1, w6, 9); + R22(e2, a2, b2, c2, d2, w13, 8); + R21(d1, e1, a1, b1, c1, w15, 7); + R22(d2, e2, a2, b2, c2, w5, 9); + R21(c1, d1, e1, a1, b1, w3, 15); + R22(c2, d2, e2, a2, b2, w10, 11); + R21(b1, c1, d1, e1, a1, w12, 7); + R22(b2, c2, d2, e2, a2, w14, 7); + R21(a1, b1, c1, d1, e1, w0, 12); + R22(a2, b2, c2, d2, e2, w15, 7); + R21(e1, a1, b1, c1, d1, w9, 15); + R22(e2, a2, b2, c2, d2, w8, 12); + R21(d1, e1, a1, b1, c1, w5, 9); + R22(d2, e2, a2, b2, c2, w12, 7); + R21(c1, d1, e1, a1, b1, w2, 11); + R22(c2, d2, e2, a2, b2, w4, 6); + R21(b1, c1, d1, e1, a1, w14, 7); + R22(b2, c2, d2, e2, a2, w9, 15); + R21(a1, b1, c1, d1, e1, w11, 13); + R22(a2, b2, c2, d2, e2, w1, 13); + R21(e1, a1, b1, c1, d1, w8, 12); + R22(e2, a2, b2, c2, d2, w2, 11); + + R31(d1, e1, a1, b1, c1, w3, 11); + R32(d2, e2, a2, b2, c2, w15, 9); + R31(c1, d1, e1, a1, b1, w10, 13); + R32(c2, d2, e2, a2, b2, w5, 7); + R31(b1, c1, d1, e1, a1, w14, 6); + R32(b2, c2, d2, e2, a2, w1, 15); + R31(a1, b1, c1, d1, e1, w4, 7); + R32(a2, b2, c2, d2, e2, w3, 11); + R31(e1, a1, b1, c1, d1, w9, 14); + R32(e2, a2, b2, c2, d2, w7, 8); + R31(d1, e1, a1, b1, c1, w15, 9); + R32(d2, e2, a2, b2, c2, w14, 6); + R31(c1, d1, e1, a1, b1, w8, 13); + R32(c2, d2, e2, a2, b2, w6, 6); + R31(b1, c1, d1, e1, a1, w1, 15); + R32(b2, c2, d2, e2, a2, w9, 14); + R31(a1, b1, c1, d1, e1, w2, 14); + R32(a2, b2, c2, d2, e2, w11, 12); + R31(e1, a1, b1, c1, d1, w7, 8); + R32(e2, a2, b2, c2, d2, w8, 13); + R31(d1, e1, a1, b1, c1, w0, 13); + R32(d2, e2, a2, b2, c2, w12, 5); + R31(c1, d1, e1, a1, b1, w6, 6); + R32(c2, d2, e2, a2, b2, w2, 14); + R31(b1, c1, d1, e1, a1, w13, 5); + R32(b2, c2, d2, e2, a2, w10, 13); + R31(a1, b1, c1, d1, e1, w11, 12); + R32(a2, b2, c2, d2, e2, w0, 13); + R31(e1, a1, b1, c1, d1, w5, 7); + R32(e2, a2, b2, c2, d2, w4, 7); + R31(d1, e1, a1, b1, c1, w12, 5); + R32(d2, e2, a2, b2, c2, w13, 5); + + R41(c1, d1, e1, a1, b1, w1, 11); + R42(c2, d2, e2, a2, b2, w8, 15); + R41(b1, c1, d1, e1, a1, w9, 12); + R42(b2, c2, d2, e2, a2, w6, 5); + R41(a1, b1, c1, d1, e1, w11, 14); + R42(a2, b2, c2, d2, e2, w4, 8); + R41(e1, a1, b1, c1, d1, w10, 15); + R42(e2, a2, b2, c2, d2, w1, 11); + R41(d1, e1, a1, b1, c1, w0, 14); + R42(d2, e2, a2, b2, c2, w3, 14); + R41(c1, d1, e1, a1, b1, w8, 15); + R42(c2, d2, e2, a2, b2, w11, 14); + R41(b1, c1, d1, e1, a1, w12, 9); + R42(b2, c2, d2, e2, a2, w15, 6); + R41(a1, b1, c1, d1, e1, w4, 8); + R42(a2, b2, c2, d2, e2, w0, 14); + R41(e1, a1, b1, c1, d1, w13, 9); + R42(e2, a2, b2, c2, d2, w5, 6); + R41(d1, e1, a1, b1, c1, w3, 14); + R42(d2, e2, a2, b2, c2, w12, 9); + R41(c1, d1, e1, a1, b1, w7, 5); + R42(c2, d2, e2, a2, b2, w2, 12); + R41(b1, c1, d1, e1, a1, w15, 6); + R42(b2, c2, d2, e2, a2, w13, 9); + R41(a1, b1, c1, d1, e1, w14, 8); + R42(a2, b2, c2, d2, e2, w9, 12); + R41(e1, a1, b1, c1, d1, w5, 6); + R42(e2, a2, b2, c2, d2, w7, 5); + R41(d1, e1, a1, b1, c1, w6, 5); + R42(d2, e2, a2, b2, c2, w10, 15); + R41(c1, d1, e1, a1, b1, w2, 12); + R42(c2, d2, e2, a2, b2, w14, 8); + + R51(b1, c1, d1, e1, a1, w4, 9); + R52(b2, c2, d2, e2, a2, w12, 8); + R51(a1, b1, c1, d1, e1, w0, 15); + R52(a2, b2, c2, d2, e2, w15, 5); + R51(e1, a1, b1, c1, d1, w5, 5); + R52(e2, a2, b2, c2, d2, w10, 12); + R51(d1, e1, a1, b1, c1, w9, 11); + R52(d2, e2, a2, b2, c2, w4, 9); + R51(c1, d1, e1, a1, b1, w7, 6); + R52(c2, d2, e2, a2, b2, w1, 12); + R51(b1, c1, d1, e1, a1, w12, 8); + R52(b2, c2, d2, e2, a2, w5, 5); + R51(a1, b1, c1, d1, e1, w2, 13); + R52(a2, b2, c2, d2, e2, w8, 14); + R51(e1, a1, b1, c1, d1, w10, 12); + R52(e2, a2, b2, c2, d2, w7, 6); + R51(d1, e1, a1, b1, c1, w14, 5); + R52(d2, e2, a2, b2, c2, w6, 8); + R51(c1, d1, e1, a1, b1, w1, 12); + R52(c2, d2, e2, a2, b2, w2, 13); + R51(b1, c1, d1, e1, a1, w3, 13); + R52(b2, c2, d2, e2, a2, w13, 6); + R51(a1, b1, c1, d1, e1, w8, 14); + R52(a2, b2, c2, d2, e2, w14, 5); + R51(e1, a1, b1, c1, d1, w11, 11); + R52(e2, a2, b2, c2, d2, w0, 15); + R51(d1, e1, a1, b1, c1, w6, 8); + R52(d2, e2, a2, b2, c2, w3, 13); + R51(c1, d1, e1, a1, b1, w15, 5); + R52(c2, d2, e2, a2, b2, w9, 11); + R51(b1, c1, d1, e1, a1, w13, 6); + R52(b2, c2, d2, e2, a2, w11, 11); + + uint32_t t = s[0]; + s[0] = s[1] + c1 + d2; + s[1] = s[2] + d1 + e2; + s[2] = s[3] + e1 + a2; + s[3] = s[4] + a1 + b2; + s[4] = t + b1 + c2; +} + +} // namespace ripemd160 + +} // namespace + +////// RIPEMD160 + +CRIPEMD160::CRIPEMD160() : bytes(0) +{ + ripemd160::Initialize(s); +} + +CRIPEMD160& CRIPEMD160::Write(const unsigned char* data, size_t len) +{ + const unsigned char* end = data + len; + size_t bufsize = bytes % 64; + if (bufsize && bufsize + len >= 64) { + // Fill the buffer, and process it. + memcpy(buf + bufsize, data, 64 - bufsize); + bytes += 64 - bufsize; + data += 64 - bufsize; + ripemd160::Transform(s, buf); + bufsize = 0; + } + while (end >= data + 64) { + // Process full chunks directly from the source. + ripemd160::Transform(s, data); + bytes += 64; + data += 64; + } + if (end > data) { + // Fill the buffer with what remains. + memcpy(buf + bufsize, data, end - data); + bytes += end - data; + } + return *this; +} + +void CRIPEMD160::Finalize(unsigned char hash[OUTPUT_SIZE]) +{ + static const unsigned char pad[64] = {0x80}; + unsigned char sizedesc[8]; + WriteLE64(sizedesc, bytes << 3); + Write(pad, 1 + ((119 - (bytes % 64)) % 64)); + Write(sizedesc, 8); + WriteLE32(hash, s[0]); + WriteLE32(hash + 4, s[1]); + WriteLE32(hash + 8, s[2]); + WriteLE32(hash + 12, s[3]); + WriteLE32(hash + 16, s[4]); +} + +CRIPEMD160& CRIPEMD160::Reset() +{ + bytes = 0; + ripemd160::Initialize(s); + return *this; +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/ripemd160.h b/src/Native/libverushash/crypto/ripemd160.h new file mode 100644 index 000000000..9815b4ab1 --- /dev/null +++ b/src/Native/libverushash/crypto/ripemd160.h @@ -0,0 +1,28 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#ifndef BITCOIN_CRYPTO_RIPEMD160_H +#define BITCOIN_CRYPTO_RIPEMD160_H + +#include +#include + +/** A hasher class for RIPEMD-160. */ +class CRIPEMD160 +{ +private: + uint32_t s[5]; + unsigned char buf[64]; + size_t bytes; + +public: + static const size_t OUTPUT_SIZE = 20; + + CRIPEMD160(); + CRIPEMD160& Write(const unsigned char* data, size_t len); + void Finalize(unsigned char hash[OUTPUT_SIZE]); + CRIPEMD160& Reset(); +}; + +#endif // BITCOIN_CRYPTO_RIPEMD160_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/sha256.cpp b/src/Native/libverushash/crypto/sha256.cpp new file mode 100644 index 000000000..afa282df1 --- /dev/null +++ b/src/Native/libverushash/crypto/sha256.cpp @@ -0,0 +1,199 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#include "sha256.h" + +#include "common.h" + +#include +#include + +// Internal implementation code. +namespace +{ +/// Internal SHA-256 implementation. +namespace sha256 +{ +uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); } +uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); } +uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); } +uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); } +uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); } +uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); } + +/** One round of SHA-256. */ +void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k, uint32_t w) +{ + uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k + w; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + d += t1; + h = t1 + t2; +} + +/** Initialize SHA-256 state. */ +void inline Initialize(uint32_t* s) +{ + s[0] = 0x6a09e667ul; + s[1] = 0xbb67ae85ul; + s[2] = 0x3c6ef372ul; + s[3] = 0xa54ff53aul; + s[4] = 0x510e527ful; + s[5] = 0x9b05688cul; + s[6] = 0x1f83d9abul; + s[7] = 0x5be0cd19ul; +} + +/** Perform one SHA-256 transformation, processing a 64-byte chunk. */ +void Transform(uint32_t* s, const unsigned char* chunk) +{ + uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; + + Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = ReadBE32(chunk + 0)); + Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = ReadBE32(chunk + 4)); + Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = ReadBE32(chunk + 8)); + Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = ReadBE32(chunk + 12)); + Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = ReadBE32(chunk + 16)); + Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = ReadBE32(chunk + 20)); + Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = ReadBE32(chunk + 24)); + Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = ReadBE32(chunk + 28)); + Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = ReadBE32(chunk + 32)); + Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = ReadBE32(chunk + 36)); + Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = ReadBE32(chunk + 40)); + Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = ReadBE32(chunk + 44)); + Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = ReadBE32(chunk + 48)); + Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = ReadBE32(chunk + 52)); + Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = ReadBE32(chunk + 56)); + Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = ReadBE32(chunk + 60)); + + Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x0fc19dc6, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x240ca1cc, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x2de92c6f, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x4a7484aa, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x76f988da, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0x983e5152, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0xa831c66d, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0xb00327c8, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0xbf597fc7, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0xc6e00bf3, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xd5a79147, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0x06ca6351, w14 += sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0x14292967, w15 += sigma1(w13) + w8 + sigma0(w0)); + + Round(a, b, c, d, e, f, g, h, 0x27b70a85, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0x2e1b2138, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x53380d13, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x650a7354, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x766a0abb, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x81c2c92e, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x92722c85, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0xa81a664b, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0xc24b8b70, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0xc76c51a3, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0xd192e819, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xd6990624, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0xf40e3585, w14 += sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0x106aa070, w15 += sigma1(w13) + w8 + sigma0(w0)); + + Round(a, b, c, d, e, f, g, h, 0x19a4c116, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0x1e376c08, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x2748774c, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x34b0bcb5, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x391c0cb3, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x5b9cca4f, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x682e6ff3, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0x748f82ee, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0x78a5636f, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0x84c87814, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0x8cc70208, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0x90befffa, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xa4506ceb, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0xbef9a3f7, w14 + sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0xc67178f2, w15 + sigma1(w13) + w8 + sigma0(w0)); + + s[0] += a; + s[1] += b; + s[2] += c; + s[3] += d; + s[4] += e; + s[5] += f; + s[6] += g; + s[7] += h; +} + +} // namespace sha256 +} // namespace + + +////// SHA-256 + +CSHA256::CSHA256() : bytes(0) +{ + sha256::Initialize(s); +} + +CSHA256& CSHA256::Write(const unsigned char* data, size_t len) +{ + const unsigned char* end = data + len; + size_t bufsize = bytes % 64; + if (bufsize && bufsize + len >= 64) { + // Fill the buffer, and process it. + memcpy(buf + bufsize, data, 64 - bufsize); + bytes += 64 - bufsize; + data += 64 - bufsize; + sha256::Transform(s, buf); + bufsize = 0; + } + while (end >= data + 64) { + // Process full chunks directly from the source. + sha256::Transform(s, data); + bytes += 64; + data += 64; + } + if (end > data) { + // Fill the buffer with what remains. + memcpy(buf + bufsize, data, end - data); + bytes += end - data; + } + return *this; +} + +void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) +{ + static const unsigned char pad[64] = {0x80}; + unsigned char sizedesc[8]; + WriteBE64(sizedesc, bytes << 3); + Write(pad, 1 + ((119 - (bytes % 64)) % 64)); + Write(sizedesc, 8); + FinalizeNoPadding(hash, false); +} + +void CSHA256::FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression) +{ + if (enforce_compression && bytes != 64) { + throw std::length_error("SHA256Compress should be invoked with a 512-bit block"); + } + + WriteBE32(hash, s[0]); + WriteBE32(hash + 4, s[1]); + WriteBE32(hash + 8, s[2]); + WriteBE32(hash + 12, s[3]); + WriteBE32(hash + 16, s[4]); + WriteBE32(hash + 20, s[5]); + WriteBE32(hash + 24, s[6]); + WriteBE32(hash + 28, s[7]); +} + +CSHA256& CSHA256::Reset() +{ + bytes = 0; + sha256::Initialize(s); + return *this; +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/sha256.h b/src/Native/libverushash/crypto/sha256.h new file mode 100644 index 000000000..c6d6467da --- /dev/null +++ b/src/Native/libverushash/crypto/sha256.h @@ -0,0 +1,32 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#ifndef BITCOIN_CRYPTO_SHA256_H +#define BITCOIN_CRYPTO_SHA256_H + +#include +#include + +/** A hasher class for SHA-256. */ +class CSHA256 +{ +public: + static const size_t OUTPUT_SIZE = 32; + + CSHA256(); + CSHA256& Write(const unsigned char* data, size_t len); + void Finalize(unsigned char hash[OUTPUT_SIZE]); + void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE]) { + FinalizeNoPadding(hash, true); + }; + CSHA256& Reset(); + +private: + uint32_t s[8]; + unsigned char buf[64]; + size_t bytes; + void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression); +}; + +#endif // BITCOIN_CRYPTO_SHA256_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/tinyformat.h b/src/Native/libverushash/crypto/tinyformat.h new file mode 100644 index 000000000..83afac544 --- /dev/null +++ b/src/Native/libverushash/crypto/tinyformat.h @@ -0,0 +1,1013 @@ +// tinyformat.h +// Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com] +// +// Boost Software License - Version 1.0 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//------------------------------------------------------------------------------ +// Tinyformat: A minimal type safe printf replacement +// +// tinyformat.h is a type safe printf replacement library in a single C++ +// header file. Design goals include: +// +// * Type safety and extensibility for user defined types. +// * C99 printf() compatibility, to the extent possible using std::ostream +// * Simplicity and minimalism. A single header file to include and distribute +// with your projects. +// * Augment rather than replace the standard stream formatting mechanism +// * C++98 support, with optional C++11 niceties +// +// +// Main interface example usage +// ---------------------------- +// +// To print a date to std::cout: +// +// std::string weekday = "Wednesday"; +// const char* month = "July"; +// size_t day = 27; +// long hour = 14; +// int min = 44; +// +// tfm::printf("%s, %s %d, %.2d:%.2d\n", weekday, month, day, hour, min); +// +// The strange types here emphasize the type safety of the interface; it is +// possible to print a std::string using the "%s" conversion, and a +// size_t using the "%d" conversion. A similar result could be achieved +// using either of the tfm::format() functions. One prints on a user provided +// stream: +// +// tfm::format(std::cerr, "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// The other returns a std::string: +// +// std::string date = tfm::format("%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// std::cout << date; +// +// These are the three primary interface functions. +// +// +// User defined format functions +// ----------------------------- +// +// Simulating variadic templates in C++98 is pretty painful since it requires +// writing out the same function for each desired number of arguments. To make +// this bearable tinyformat comes with a set of macros which are used +// internally to generate the API, but which may also be used in user code. +// +// The three macros TINYFORMAT_ARGTYPES(n), TINYFORMAT_VARARGS(n) and +// TINYFORMAT_PASSARGS(n) will generate a list of n argument types, +// type/name pairs and argument names respectively when called with an integer +// n between 1 and 16. We can use these to define a macro which generates the +// desired user defined function with n arguments. To generate all 16 user +// defined function bodies, use the macro TINYFORMAT_FOREACH_ARGNUM. For an +// example, see the implementation of printf() at the end of the source file. +// +// +// Additional API information +// -------------------------- +// +// Error handling: Define TINYFORMAT_ERROR to customize the error handling for +// format strings which are unsupported or have the wrong number of format +// specifiers (calls assert() by default). +// +// User defined types: Uses operator<< for user defined types by default. +// Overload formatValue() for more control. + + +#ifndef TINYFORMAT_H_INCLUDED +#define TINYFORMAT_H_INCLUDED + +namespace tinyformat {} +//------------------------------------------------------------------------------ +// Config section. Customize to your liking! + +// Namespace alias to encourage brevity +namespace tfm = tinyformat; + +// Error handling; calls assert() by default. +#define TINYFORMAT_ERROR(reasonString) throw std::runtime_error(reasonString) + +// Define for C++11 variadic templates which make the code shorter & more +// general. If you don't define this, C++11 support is autodetected below. +// #define TINYFORMAT_USE_VARIADIC_TEMPLATES + + +//------------------------------------------------------------------------------ +// Implementation details. +#include +#include +#include +#include + +#ifndef TINYFORMAT_ERROR +# define TINYFORMAT_ERROR(reason) assert(0 && reason) +#endif + +#if !defined(TINYFORMAT_USE_VARIADIC_TEMPLATES) && !defined(TINYFORMAT_NO_VARIADIC_TEMPLATES) +# ifdef __GXX_EXPERIMENTAL_CXX0X__ +# define TINYFORMAT_USE_VARIADIC_TEMPLATES +# endif +#endif + +#ifdef __GNUC__ +# define TINYFORMAT_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) +# define TINYFORMAT_NOINLINE __declspec(noinline) +#else +# define TINYFORMAT_NOINLINE +#endif + +#if defined(__GLIBCXX__) && __GLIBCXX__ < 20080201 +// std::showpos is broken on old libstdc++ as provided with OSX. See +// http://gcc.gnu.org/ml/libstdc++/2007-11/msg00075.html +# define TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND +#endif + +namespace tinyformat { + +//------------------------------------------------------------------------------ +namespace detail { + +// Test whether type T1 is convertible to type T2 +template +struct is_convertible +{ + private: + // two types of different size + struct fail { char dummy[2]; }; + struct succeed { char dummy; }; + // Try to convert a T1 to a T2 by plugging into tryConvert + static fail tryConvert(...); + static succeed tryConvert(const T2&); + static const T1& makeT1(); + public: +# ifdef _MSC_VER + // Disable spurious loss of precision warnings in tryConvert(makeT1()) +# pragma warning(push) +# pragma warning(disable:4244) +# pragma warning(disable:4267) +# endif + // Standard trick: the (...) version of tryConvert will be chosen from + // the overload set only if the version taking a T2 doesn't match. + // Then we compare the sizes of the return types to check which + // function matched. Very neat, in a disgusting kind of way :) + static const bool value = + sizeof(tryConvert(makeT1())) == sizeof(succeed); +# ifdef _MSC_VER +# pragma warning(pop) +# endif +}; + + +// Detect when a type is not a wchar_t string +template struct is_wchar { typedef int tinyformat_wchar_is_not_supported; }; +template<> struct is_wchar {}; +template<> struct is_wchar {}; +template struct is_wchar {}; +template struct is_wchar {}; + + +// Format the value by casting to type fmtT. This default implementation +// should never be called. +template::value> +struct formatValueAsType +{ + static void invoke(std::ostream& /*out*/, const T& /*value*/) { assert(0); } +}; +// Specialized version for types that can actually be converted to fmtT, as +// indicated by the "convertible" template parameter. +template +struct formatValueAsType +{ + static void invoke(std::ostream& out, const T& value) + { out << static_cast(value); } +}; + +#ifdef TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND +template::value> +struct formatZeroIntegerWorkaround +{ + static bool invoke(std::ostream& /**/, const T& /**/) { return false; } +}; +template +struct formatZeroIntegerWorkaround +{ + static bool invoke(std::ostream& out, const T& value) + { + if (static_cast(value) == 0 && out.flags() & std::ios::showpos) + { + out << "+0"; + return true; + } + return false; + } +}; +#endif // TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND + +// Convert an arbitrary type to integer. The version with convertible=false +// throws an error. +template::value> +struct convertToInt +{ + static int invoke(const T& /*value*/) + { + TINYFORMAT_ERROR("tinyformat: Cannot convert from argument type to " + "integer for use as variable width or precision"); + return 0; + } +}; +// Specialization for convertToInt when conversion is possible +template +struct convertToInt +{ + static int invoke(const T& value) { return static_cast(value); } +}; + +} // namespace detail + + +//------------------------------------------------------------------------------ +// Variable formatting functions. May be overridden for user-defined types if +// desired. + + +// Format a value into a stream. Called from format() for all types by default. +// +// Users may override this for their own types. When this function is called, +// the stream flags will have been modified according to the format string. +// The format specification is provided in the range [fmtBegin, fmtEnd). +// +// By default, formatValue() uses the usual stream insertion operator +// operator<< to format the type T, with special cases for the %c and %p +// conversions. +template +inline void formatValue(std::ostream& out, const char* /*fmtBegin*/, + const char* fmtEnd, const T& value) +{ +#ifndef TINYFORMAT_ALLOW_WCHAR_STRINGS + // Since we don't support printing of wchar_t using "%ls", make it fail at + // compile time in preference to printing as a void* at runtime. + typedef typename detail::is_wchar::tinyformat_wchar_is_not_supported DummyType; + (void) DummyType(); // avoid unused type warning with gcc-4.8 +#endif + // The mess here is to support the %c and %p conversions: if these + // conversions are active we try to convert the type to a char or const + // void* respectively and format that instead of the value itself. For the + // %p conversion it's important to avoid dereferencing the pointer, which + // could otherwise lead to a crash when printing a dangling (const char*). + const bool canConvertToChar = detail::is_convertible::value; + const bool canConvertToVoidPtr = detail::is_convertible::value; + if(canConvertToChar && *(fmtEnd-1) == 'c') + detail::formatValueAsType::invoke(out, value); + else if(canConvertToVoidPtr && *(fmtEnd-1) == 'p') + detail::formatValueAsType::invoke(out, value); +#ifdef TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND + else if(detail::formatZeroIntegerWorkaround::invoke(out, value)) /**/; +#endif + else + out << value; +} + + +// Overloaded version for char types to support printing as an integer +#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ +inline void formatValue(std::ostream& out, const char* /*fmtBegin*/, \ + const char* fmtEnd, charType value) \ +{ \ + switch(*(fmtEnd-1)) \ + { \ + case 'u': case 'd': case 'i': case 'o': case 'X': case 'x': \ + out << static_cast(value); break; \ + default: \ + out << value; break; \ + } \ +} +// per 3.9.1: char, signed char and unsigned char are all distinct types +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(signed char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(unsigned char) +#undef TINYFORMAT_DEFINE_FORMATVALUE_CHAR + + +//------------------------------------------------------------------------------ +// Tools for emulating variadic templates in C++98. The basic idea here is +// stolen from the boost preprocessor metaprogramming library and cut down to +// be just general enough for what we need. + +#define TINYFORMAT_ARGTYPES(n) TINYFORMAT_ARGTYPES_ ## n +#define TINYFORMAT_VARARGS(n) TINYFORMAT_VARARGS_ ## n +#define TINYFORMAT_PASSARGS(n) TINYFORMAT_PASSARGS_ ## n +#define TINYFORMAT_PASSARGS_TAIL(n) TINYFORMAT_PASSARGS_TAIL_ ## n + +// To keep it as transparent as possible, the macros below have been generated +// using python via the excellent cog.py code generation script. This avoids +// the need for a bunch of complex (but more general) preprocessor tricks as +// used in boost.preprocessor. +// +// To rerun the code generation in place, use `cog.py -r tinyformat.h` +// (see http://nedbatchelder.com/code/cog). Alternatively you can just create +// extra versions by hand. + +/*[[[cog +maxParams = 16 + +def makeCommaSepLists(lineTemplate, elemTemplate, startInd=1): + for j in range(startInd,maxParams+1): + list = ', '.join([elemTemplate % {'i':i} for i in range(startInd,j+1)]) + cog.outl(lineTemplate % {'j':j, 'list':list}) + +makeCommaSepLists('#define TINYFORMAT_ARGTYPES_%(j)d %(list)s', + 'class T%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_VARARGS_%(j)d %(list)s', + 'const T%(i)d& v%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_PASSARGS_%(j)d %(list)s', 'v%(i)d') + +cog.outl() +cog.outl('#define TINYFORMAT_PASSARGS_TAIL_1') +makeCommaSepLists('#define TINYFORMAT_PASSARGS_TAIL_%(j)d , %(list)s', + 'v%(i)d', startInd = 2) + +cog.outl() +cog.outl('#define TINYFORMAT_FOREACH_ARGNUM(m) \\\n ' + + ' '.join(['m(%d)' % (j,) for j in range(1,maxParams+1)])) +]]]*/ +#define TINYFORMAT_ARGTYPES_1 class T1 +#define TINYFORMAT_ARGTYPES_2 class T1, class T2 +#define TINYFORMAT_ARGTYPES_3 class T1, class T2, class T3 +#define TINYFORMAT_ARGTYPES_4 class T1, class T2, class T3, class T4 +#define TINYFORMAT_ARGTYPES_5 class T1, class T2, class T3, class T4, class T5 +#define TINYFORMAT_ARGTYPES_6 class T1, class T2, class T3, class T4, class T5, class T6 +#define TINYFORMAT_ARGTYPES_7 class T1, class T2, class T3, class T4, class T5, class T6, class T7 +#define TINYFORMAT_ARGTYPES_8 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8 +#define TINYFORMAT_ARGTYPES_9 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9 +#define TINYFORMAT_ARGTYPES_10 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10 +#define TINYFORMAT_ARGTYPES_11 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11 +#define TINYFORMAT_ARGTYPES_12 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12 +#define TINYFORMAT_ARGTYPES_13 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13 +#define TINYFORMAT_ARGTYPES_14 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14 +#define TINYFORMAT_ARGTYPES_15 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15 +#define TINYFORMAT_ARGTYPES_16 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16 + +#define TINYFORMAT_VARARGS_1 const T1& v1 +#define TINYFORMAT_VARARGS_2 const T1& v1, const T2& v2 +#define TINYFORMAT_VARARGS_3 const T1& v1, const T2& v2, const T3& v3 +#define TINYFORMAT_VARARGS_4 const T1& v1, const T2& v2, const T3& v3, const T4& v4 +#define TINYFORMAT_VARARGS_5 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5 +#define TINYFORMAT_VARARGS_6 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6 +#define TINYFORMAT_VARARGS_7 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7 +#define TINYFORMAT_VARARGS_8 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8 +#define TINYFORMAT_VARARGS_9 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9 +#define TINYFORMAT_VARARGS_10 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10 +#define TINYFORMAT_VARARGS_11 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11 +#define TINYFORMAT_VARARGS_12 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11, const T12& v12 +#define TINYFORMAT_VARARGS_13 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11, const T12& v12, const T13& v13 +#define TINYFORMAT_VARARGS_14 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11, const T12& v12, const T13& v13, const T14& v14 +#define TINYFORMAT_VARARGS_15 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11, const T12& v12, const T13& v13, const T14& v14, const T15& v15 +#define TINYFORMAT_VARARGS_16 const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5, const T6& v6, const T7& v7, const T8& v8, const T9& v9, const T10& v10, const T11& v11, const T12& v12, const T13& v13, const T14& v14, const T15& v15, const T16& v16 + +#define TINYFORMAT_PASSARGS_1 v1 +#define TINYFORMAT_PASSARGS_2 v1, v2 +#define TINYFORMAT_PASSARGS_3 v1, v2, v3 +#define TINYFORMAT_PASSARGS_4 v1, v2, v3, v4 +#define TINYFORMAT_PASSARGS_5 v1, v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_6 v1, v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_7 v1, v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_8 v1, v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_9 v1, v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_10 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_11 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_12 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_13 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_14 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_15 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_16 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_PASSARGS_TAIL_1 +#define TINYFORMAT_PASSARGS_TAIL_2 , v2 +#define TINYFORMAT_PASSARGS_TAIL_3 , v2, v3 +#define TINYFORMAT_PASSARGS_TAIL_4 , v2, v3, v4 +#define TINYFORMAT_PASSARGS_TAIL_5 , v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_TAIL_6 , v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_TAIL_7 , v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_TAIL_8 , v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_TAIL_9 , v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_TAIL_10 , v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_TAIL_11 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_TAIL_12 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_TAIL_13 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_TAIL_14 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_TAIL_15 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_TAIL_16 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_FOREACH_ARGNUM(m) \ + m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) m(10) m(11) m(12) m(13) m(14) m(15) m(16) +//[[[end]]] + + + +namespace detail { + +// Class holding current position in format string and an output stream into +// which arguments are formatted. +class FormatIterator +{ + public: + // Flags for features not representable with standard stream state + enum ExtraFormatFlags + { + Flag_None = 0, + Flag_TruncateToPrecision = 1<<0, // truncate length to stream precision() + Flag_SpacePadPositive = 1<<1, // pad positive values with spaces + Flag_VariableWidth = 1<<2, // variable field width in arg list + Flag_VariablePrecision = 1<<3 // variable field precision in arg list + }; + + // out is the output stream, fmt is the full format string + FormatIterator(std::ostream& out, const char* fmt) + : m_out(out), + m_fmt(fmt), + m_extraFlags(Flag_None), + m_wantWidth(false), + m_wantPrecision(false), + m_variableWidth(0), + m_variablePrecision(0), + m_origWidth(out.width()), + m_origPrecision(out.precision()), + m_origFlags(out.flags()), + m_origFill(out.fill()) + { } + + // Print remaining part of format string. + void finish() + { + // It would be nice if we could do this from the destructor, but we + // can't if TINFORMAT_ERROR is used to throw an exception! + m_fmt = printFormatStringLiteral(m_out, m_fmt); + if(*m_fmt != '\0') + TINYFORMAT_ERROR("tinyformat: Too many conversion specifiers in format string"); + } + + ~FormatIterator() + { + // Restore stream state + m_out.width(m_origWidth); + m_out.precision(m_origPrecision); + m_out.flags(m_origFlags); + m_out.fill(m_origFill); + } + + template + void accept(const T& value); + + private: + // Parse and return an integer from the string c, as atoi() + // On return, c is set to one past the end of the integer. + static int parseIntAndAdvance(const char*& c) + { + int i = 0; + for(;*c >= '0' && *c <= '9'; ++c) + i = 10*i + (*c - '0'); + return i; + } + + // Format at most truncLen characters of a C string to the given + // stream. Return true if formatting proceeded (generic version always + // returns false) + template + static bool formatCStringTruncate(std::ostream& /*out*/, const T& /*value*/, + std::streamsize /*truncLen*/) + { + return false; + } +# define TINYFORMAT_DEFINE_FORMAT_C_STRING_TRUNCATE(type) \ + static bool formatCStringTruncate(std::ostream& out, type* value, \ + std::streamsize truncLen) \ + { \ + std::streamsize len = 0; \ + while(len < truncLen && value[len] != 0) \ + ++len; \ + out.write(value, len); \ + return true; \ + } + // Overload for const char* and char*. Could overload for signed & + // unsigned char too, but these are technically unneeded for printf + // compatibility. + TINYFORMAT_DEFINE_FORMAT_C_STRING_TRUNCATE(const char) + TINYFORMAT_DEFINE_FORMAT_C_STRING_TRUNCATE(char) +# undef TINYFORMAT_DEFINE_FORMAT_C_STRING_TRUNCATE + + // Print literal part of format string and return next format spec + // position. + // + // Skips over any occurrences of '%%', printing a literal '%' to the + // output. The position of the first % character of the next + // nontrivial format spec is returned, or the end of string. + static const char* printFormatStringLiteral(std::ostream& out, + const char* fmt) + { + const char* c = fmt; + for(; true; ++c) + { + switch(*c) + { + case '\0': + out.write(fmt, static_cast(c - fmt)); + return c; + case '%': + out.write(fmt, static_cast(c - fmt)); + if(*(c+1) != '%') + return c; + // for "%%", tack trailing % onto next literal section. + fmt = ++c; + break; + } + } + } + + static const char* streamStateFromFormat(std::ostream& out, + unsigned int& extraFlags, + const char* fmtStart, + int variableWidth, + int variablePrecision); + + // Private copy & assign: Kill gcc warnings with -Weffc++ + FormatIterator(const FormatIterator&); + FormatIterator& operator=(const FormatIterator&); + + // Stream, current format string & state + std::ostream& m_out; + const char* m_fmt; + unsigned int m_extraFlags; + // State machine info for handling of variable width & precision + bool m_wantWidth; + bool m_wantPrecision; + int m_variableWidth; + int m_variablePrecision; + // Saved stream state + std::streamsize m_origWidth; + std::streamsize m_origPrecision; + std::ios::fmtflags m_origFlags; + char m_origFill; +}; + + +// Accept a value for formatting into the internal stream. +template +TINYFORMAT_NOINLINE // < greatly reduces bloat in optimized builds +void FormatIterator::accept(const T& value) +{ + // Parse the format string + const char* fmtEnd = 0; + if(m_extraFlags == Flag_None && !m_wantWidth && !m_wantPrecision) + { + m_fmt = printFormatStringLiteral(m_out, m_fmt); + fmtEnd = streamStateFromFormat(m_out, m_extraFlags, m_fmt, 0, 0); + m_wantWidth = (m_extraFlags & Flag_VariableWidth) != 0; + m_wantPrecision = (m_extraFlags & Flag_VariablePrecision) != 0; + } + // Consume value as variable width and precision specifier if necessary + if(m_extraFlags & (Flag_VariableWidth | Flag_VariablePrecision)) + { + if(m_wantWidth || m_wantPrecision) + { + int v = convertToInt::invoke(value); + if(m_wantWidth) + { + m_variableWidth = v; + m_wantWidth = false; + } + else if(m_wantPrecision) + { + m_variablePrecision = v; + m_wantPrecision = false; + } + return; + } + // If we get here, we've set both the variable precision and width as + // required and we need to rerun the stream state setup to insert these. + fmtEnd = streamStateFromFormat(m_out, m_extraFlags, m_fmt, + m_variableWidth, m_variablePrecision); + } + + // Format the value into the stream. + if(!(m_extraFlags & (Flag_SpacePadPositive | Flag_TruncateToPrecision))) + formatValue(m_out, m_fmt, fmtEnd, value); + else + { + // The following are special cases where there's no direct + // correspondence between stream formatting and the printf() behaviour. + // Instead, we simulate the behaviour crudely by formatting into a + // temporary string stream and munging the resulting string. + std::ostringstream tmpStream; + tmpStream.copyfmt(m_out); + if(m_extraFlags & Flag_SpacePadPositive) + tmpStream.setf(std::ios::showpos); + // formatCStringTruncate is required for truncating conversions like + // "%.4s" where at most 4 characters of the c-string should be read. + // If we didn't include this special case, we might read off the end. + if(!( (m_extraFlags & Flag_TruncateToPrecision) && + formatCStringTruncate(tmpStream, value, m_out.precision()) )) + { + // Not a truncated c-string; just format normally. + formatValue(tmpStream, m_fmt, fmtEnd, value); + } + std::string result = tmpStream.str(); // allocates... yuck. + if(m_extraFlags & Flag_SpacePadPositive) + { + for(size_t i = 0, iend = result.size(); i < iend; ++i) + if(result[i] == '+') + result[i] = ' '; + } + if((m_extraFlags & Flag_TruncateToPrecision) && + (int)result.size() > (int)m_out.precision()) + m_out.write(result.c_str(), m_out.precision()); + else + m_out << result; + } + m_extraFlags = Flag_None; + m_fmt = fmtEnd; +} + + +// Parse a format string and set the stream state accordingly. +// +// The format mini-language recognized here is meant to be the one from C99, +// with the form "%[flags][width][.precision][length]type". +// +// Formatting options which can't be natively represented using the ostream +// state are returned in the extraFlags parameter which is a bitwise +// combination of values from the ExtraFormatFlags enum. +inline const char* FormatIterator::streamStateFromFormat(std::ostream& out, + unsigned int& extraFlags, + const char* fmtStart, + int variableWidth, + int variablePrecision) +{ + if(*fmtStart != '%') + { + TINYFORMAT_ERROR("tinyformat: Not enough conversion specifiers in format string"); + return fmtStart; + } + // Reset stream state to defaults. + out.width(0); + out.precision(6); + out.fill(' '); + // Reset most flags; ignore irrelevant unitbuf & skipws. + out.unsetf(std::ios::adjustfield | std::ios::basefield | + std::ios::floatfield | std::ios::showbase | std::ios::boolalpha | + std::ios::showpoint | std::ios::showpos | std::ios::uppercase); + extraFlags = Flag_None; + bool precisionSet = false; + bool widthSet = false; + const char* c = fmtStart + 1; + // 1) Parse flags + for(;; ++c) + { + switch(*c) + { + case '#': + out.setf(std::ios::showpoint | std::ios::showbase); + continue; + case '0': + // overridden by left alignment ('-' flag) + if(!(out.flags() & std::ios::left)) + { + // Use internal padding so that numeric values are + // formatted correctly, eg -00010 rather than 000-10 + out.fill('0'); + out.setf(std::ios::internal, std::ios::adjustfield); + } + continue; + case '-': + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + continue; + case ' ': + // overridden by show positive sign, '+' flag. + if(!(out.flags() & std::ios::showpos)) + extraFlags |= Flag_SpacePadPositive; + continue; + case '+': + out.setf(std::ios::showpos); + extraFlags &= ~Flag_SpacePadPositive; + continue; + } + break; + } + // 2) Parse width + if(*c >= '0' && *c <= '9') + { + widthSet = true; + out.width(parseIntAndAdvance(c)); + } + if(*c == '*') + { + widthSet = true; + if(variableWidth < 0) + { + // negative widths correspond to '-' flag set + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + variableWidth = -variableWidth; + } + out.width(variableWidth); + extraFlags |= Flag_VariableWidth; + ++c; + } + // 3) Parse precision + if(*c == '.') + { + ++c; + int precision = 0; + if(*c == '*') + { + ++c; + extraFlags |= Flag_VariablePrecision; + precision = variablePrecision; + } + else + { + if(*c >= '0' && *c <= '9') + precision = parseIntAndAdvance(c); + else if(*c == '-') // negative precisions ignored, treated as zero. + parseIntAndAdvance(++c); + } + out.precision(precision); + precisionSet = true; + } + // 4) Ignore any C99 length modifier + while(*c == 'l' || *c == 'h' || *c == 'L' || + *c == 'j' || *c == 'z' || *c == 't') + ++c; + // 5) We're up to the conversion specifier character. + // Set stream flags based on conversion specifier (thanks to the + // boost::format class for forging the way here). + bool intConversion = false; + switch(*c) + { + case 'u': case 'd': case 'i': + out.setf(std::ios::dec, std::ios::basefield); + intConversion = true; + break; + case 'o': + out.setf(std::ios::oct, std::ios::basefield); + intConversion = true; + break; + case 'X': + out.setf(std::ios::uppercase); + case 'x': case 'p': + out.setf(std::ios::hex, std::ios::basefield); + intConversion = true; + break; + case 'E': + out.setf(std::ios::uppercase); + case 'e': + out.setf(std::ios::scientific, std::ios::floatfield); + out.setf(std::ios::dec, std::ios::basefield); + break; + case 'F': + out.setf(std::ios::uppercase); + case 'f': + out.setf(std::ios::fixed, std::ios::floatfield); + break; + case 'G': + out.setf(std::ios::uppercase); + case 'g': + out.setf(std::ios::dec, std::ios::basefield); + // As in boost::format, let stream decide float format. + out.flags(out.flags() & ~std::ios::floatfield); + break; + case 'a': case 'A': + TINYFORMAT_ERROR("tinyformat: the %a and %A conversion specs " + "are not supported"); + break; + case 'c': + // Handled as special case inside formatValue() + break; + case 's': + if(precisionSet) + extraFlags |= Flag_TruncateToPrecision; + // Make %s print booleans as "true" and "false" + out.setf(std::ios::boolalpha); + break; + case 'n': + // Not supported - will cause problems! + TINYFORMAT_ERROR("tinyformat: %n conversion spec not supported"); + break; + case '\0': + TINYFORMAT_ERROR("tinyformat: Conversion spec incorrectly " + "terminated by end of string"); + return c; + } + if(intConversion && precisionSet && !widthSet) + { + // "precision" for integers gives the minimum number of digits (to be + // padded with zeros on the left). This isn't really supported by the + // iostreams, but we can approximately simulate it with the width if + // the width isn't otherwise used. + out.width(out.precision()); + out.setf(std::ios::internal, std::ios::adjustfield); + out.fill('0'); + } + return c+1; +} + + + +//------------------------------------------------------------------------------ +// Private format function on top of which the public interface is implemented. +// We enforce a mimimum of one value to be formatted to prevent bugs looking like +// +// const char* myStr = "100% broken"; +// printf(myStr); // Parses % as a format specifier +#ifdef TINYFORMAT_USE_VARIADIC_TEMPLATES + +template +void format(FormatIterator& fmtIter, const T1& value1) +{ + fmtIter.accept(value1); + fmtIter.finish(); +} + +// General version for C++11 +template +void format(FormatIterator& fmtIter, const T1& value1, const Args&... args) +{ + fmtIter.accept(value1); + format(fmtIter, args...); +} + +#else + +inline void format(FormatIterator& fmtIter) +{ + fmtIter.finish(); +} + +// General version for C++98 +#define TINYFORMAT_MAKE_FORMAT_DETAIL(n) \ +template \ +void format(detail::FormatIterator& fmtIter, TINYFORMAT_VARARGS(n)) \ +{ \ + fmtIter.accept(v1); \ + format(fmtIter TINYFORMAT_PASSARGS_TAIL(n)); \ +} + +TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMAT_DETAIL) +#undef TINYFORMAT_MAKE_FORMAT_DETAIL + +#endif // End C++98 variadic template emulation for format() + +} // namespace detail + + +//------------------------------------------------------------------------------ +// Implement all the main interface functions here in terms of detail::format() + +#ifdef TINYFORMAT_USE_VARIADIC_TEMPLATES + +// C++11 - the simple case +template +void format(std::ostream& out, const char* fmt, const T1& v1, const Args&... args) +{ + detail::FormatIterator fmtIter(out, fmt); + format(fmtIter, v1, args...); +} + +template +std::string format(const char* fmt, const T1& v1, const Args&... args) +{ + std::ostringstream oss; + format(oss, fmt, v1, args...); + return oss.str(); +} + +template +std::string format(const std::string &fmt, const T1& v1, const Args&... args) +{ + std::ostringstream oss; + format(oss, fmt.c_str(), v1, args...); + return oss.str(); +} + +template +void printf(const char* fmt, const T1& v1, const Args&... args) +{ + format(std::cout, fmt, v1, args...); +} + +#else + +// C++98 - define the interface functions using the wrapping macros +#define TINYFORMAT_MAKE_FORMAT_FUNCS(n) \ + \ +template \ +void format(std::ostream& out, const char* fmt, TINYFORMAT_VARARGS(n)) \ +{ \ + tinyformat::detail::FormatIterator fmtIter(out, fmt); \ + tinyformat::detail::format(fmtIter, TINYFORMAT_PASSARGS(n)); \ +} \ + \ +template \ +std::string format(const char* fmt, TINYFORMAT_VARARGS(n)) \ +{ \ + std::ostringstream oss; \ + tinyformat::format(oss, fmt, TINYFORMAT_PASSARGS(n)); \ + return oss.str(); \ +} \ + \ +template \ +std::string format(const std::string &fmt, TINYFORMAT_VARARGS(n)) \ +{ \ + std::ostringstream oss; \ + tinyformat::format(oss, fmt.c_str(), TINYFORMAT_PASSARGS(n)); \ + return oss.str(); \ +} \ + \ +template \ +void printf(const char* fmt, TINYFORMAT_VARARGS(n)) \ +{ \ + tinyformat::format(std::cout, fmt, TINYFORMAT_PASSARGS(n)); \ +} + +TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMAT_FUNCS) +#undef TINYFORMAT_MAKE_FORMAT_FUNCS +#endif + + +//------------------------------------------------------------------------------ +// Define deprecated wrapping macro for backward compatibility in tinyformat +// 1.x. Will be removed in version 2! +#define TINYFORMAT_WRAP_FORMAT_EXTRA_ARGS +#define TINYFORMAT_WRAP_FORMAT_N(n, returnType, funcName, funcDeclSuffix, \ + bodyPrefix, streamName, bodySuffix) \ +template \ +returnType funcName(TINYFORMAT_WRAP_FORMAT_EXTRA_ARGS const char* fmt, \ + TINYFORMAT_VARARGS(n)) funcDeclSuffix \ +{ \ + bodyPrefix \ + tinyformat::format(streamName, fmt, TINYFORMAT_PASSARGS(n)); \ + bodySuffix \ +} \ + +#define TINYFORMAT_WRAP_FORMAT(returnType, funcName, funcDeclSuffix, \ + bodyPrefix, streamName, bodySuffix) \ +inline \ +returnType funcName(TINYFORMAT_WRAP_FORMAT_EXTRA_ARGS const char* fmt \ + ) funcDeclSuffix \ +{ \ + bodyPrefix \ + tinyformat::detail::FormatIterator(streamName, fmt).finish(); \ + bodySuffix \ +} \ +TINYFORMAT_WRAP_FORMAT_N(1 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(2 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(3 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(4 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(5 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(6 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(7 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(8 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(9 , returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(10, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(11, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(12, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(13, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(14, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(15, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ +TINYFORMAT_WRAP_FORMAT_N(16, returnType, funcName, funcDeclSuffix, bodyPrefix, streamName, bodySuffix) \ + + +} // namespace tinyformat + +#define strprintf tfm::format + +#endif // TINYFORMAT_H_INCLUDED \ No newline at end of file diff --git a/src/Native/libverushash/crypto/uint256.cpp b/src/Native/libverushash/crypto/uint256.cpp new file mode 100644 index 000000000..61570d1ec --- /dev/null +++ b/src/Native/libverushash/crypto/uint256.cpp @@ -0,0 +1,146 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "uint256.h" + +#include "utilstrencodings.h" + +#include +#include + +template +base_blob::base_blob(const std::vector& vch) +{ + assert(vch.size() == sizeof(data)); + memcpy(data, &vch[0], sizeof(data)); +} + +template +std::string base_blob::GetHex() const +{ + char psz[sizeof(data) * 2 + 1]; + for (unsigned int i = 0; i < sizeof(data); i++) + sprintf(psz + i * 2, "%02x", data[sizeof(data) - i - 1]); + return std::string(psz, psz + sizeof(data) * 2); +} + +template +void base_blob::SetHex(const char* psz) +{ + memset(data, 0, sizeof(data)); + + // skip leading spaces + while (isspace(*psz)) + psz++; + + // skip 0x + if (psz[0] == '0' && tolower(psz[1]) == 'x') + psz += 2; + + // hex string to uint + const char* pbegin = psz; + while (::HexDigit(*psz) != -1) + psz++; + psz--; + unsigned char* p1 = (unsigned char*)data; + unsigned char* pend = p1 + WIDTH; + while (psz >= pbegin && p1 < pend) { + *p1 = ::HexDigit(*psz--); + if (psz >= pbegin) { + *p1 |= ((unsigned char)::HexDigit(*psz--) << 4); + p1++; + } + } +} + +template +void base_blob::SetHex(const std::string& str) +{ + SetHex(str.c_str()); +} + +template +std::string base_blob::ToString() const +{ + return (GetHex()); +} + +// Explicit instantiations for base_blob<160> +template base_blob<160>::base_blob(const std::vector&); +template std::string base_blob<160>::GetHex() const; +template std::string base_blob<160>::ToString() const; +template void base_blob<160>::SetHex(const char*); +template void base_blob<160>::SetHex(const std::string&); + +// Explicit instantiations for base_blob<256> +template base_blob<256>::base_blob(const std::vector&); +template std::string base_blob<256>::GetHex() const; +template std::string base_blob<256>::ToString() const; +template void base_blob<256>::SetHex(const char*); +template void base_blob<256>::SetHex(const std::string&); + +static void inline HashMix(uint32_t& a, uint32_t& b, uint32_t& c) +{ + // Taken from lookup3, by Bob Jenkins. + a -= c; + a ^= ((c << 4) | (c >> 28)); + c += b; + b -= a; + b ^= ((a << 6) | (a >> 26)); + a += c; + c -= b; + c ^= ((b << 8) | (b >> 24)); + b += a; + a -= c; + a ^= ((c << 16) | (c >> 16)); + c += b; + b -= a; + b ^= ((a << 19) | (a >> 13)); + a += c; + c -= b; + c ^= ((b << 4) | (b >> 28)); + b += a; +} + +static void inline HashFinal(uint32_t& a, uint32_t& b, uint32_t& c) +{ + // Taken from lookup3, by Bob Jenkins. + c ^= b; + c -= ((b << 14) | (b >> 18)); + a ^= c; + a -= ((c << 11) | (c >> 21)); + b ^= a; + b -= ((a << 25) | (a >> 7)); + c ^= b; + c -= ((b << 16) | (b >> 16)); + a ^= c; + a -= ((c << 4) | (c >> 28)); + b ^= a; + b -= ((a << 14) | (a >> 18)); + c ^= b; + c -= ((b << 24) | (b >> 8)); +} + +uint64_t uint256::GetHash(const uint256& salt) const +{ + uint32_t a, b, c; + const uint32_t *pn = (const uint32_t*)data; + const uint32_t *salt_pn = (const uint32_t*)salt.data; + a = b = c = 0xdeadbeef + WIDTH; + + a += pn[0] ^ salt_pn[0]; + b += pn[1] ^ salt_pn[1]; + c += pn[2] ^ salt_pn[2]; + HashMix(a, b, c); + a += pn[3] ^ salt_pn[3]; + b += pn[4] ^ salt_pn[4]; + c += pn[5] ^ salt_pn[5]; + HashMix(a, b, c); + a += pn[6] ^ salt_pn[6]; + b += pn[7] ^ salt_pn[7]; + HashFinal(a, b, c); + + return ((((uint64_t)b) << 32) | c); +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/uint256.h b/src/Native/libverushash/crypto/uint256.h new file mode 100644 index 000000000..cb12e7259 --- /dev/null +++ b/src/Native/libverushash/crypto/uint256.h @@ -0,0 +1,176 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_UINT256_H +#define BITCOIN_UINT256_H + +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +# define _ALIGN(x) __declspec(align(x)) +#else +# define _ALIGN(x) __attribute__ ((aligned(x))) +#endif + +/** Template base class for fixed-sized opaque blobs. */ +template +class base_blob +{ +protected: + enum { WIDTH=BITS/8 }; + uint8_t _ALIGN(4) data[WIDTH]; +public: + base_blob() + { + memset(data, 0, sizeof(data)); + } + + explicit base_blob(const std::vector& vch); + + bool IsNull() const + { + for (int i = 0; i < WIDTH; i++) + if (data[i] != 0) + return false; + return true; + } + + void SetNull() + { + memset(data, 0, sizeof(data)); + } + + friend inline bool operator==(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) == 0; } + friend inline bool operator!=(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) != 0; } + friend inline bool operator<(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) < 0; } + + std::string GetHex() const; + void SetHex(const char* psz); + void SetHex(const std::string& str); + std::string ToString() const; + + unsigned char* begin() + { + return &data[0]; + } + + unsigned char* end() + { + return &data[WIDTH]; + } + + const unsigned char* begin() const + { + return &data[0]; + } + + const unsigned char* end() const + { + return &data[WIDTH]; + } + + unsigned int size() const + { + return sizeof(data); + } + + unsigned int GetSerializeSize(int nType, int nVersion) const + { + return sizeof(data); + } + + template + void Serialize(Stream& s) const + { + s.write((char*)data, sizeof(data)); + } + + template + void Unserialize(Stream& s) + { + s.read((char*)data, sizeof(data)); + } + + template + void Serialize(Stream& s, int nType, int nVersion) const + { + s.write((char*)data, sizeof(data)); + } + + template + void Unserialize(Stream& s, int nType, int nVersion) + { + s.read((char*)data, sizeof(data)); + } +}; + +/** 160-bit opaque blob. + * @note This type is called uint160 for historical reasons only. It is an opaque + * blob of 160 bits and has no integer operations. + */ +class uint160 : public base_blob<160> { +public: + uint160() {} + uint160(const base_blob<160>& b) : base_blob<160>(b) {} + explicit uint160(const std::vector& vch) : base_blob<160>(vch) {} +}; + +/** 256-bit opaque blob. + * @note This type is called uint256 for historical reasons only. It is an + * opaque blob of 256 bits and has no integer operations. Use arith_uint256 if + * those are required. + */ +class uint256 : public base_blob<256> { +public: + uint256() {} + uint256(const base_blob<256>& b) : base_blob<256>(b) {} + explicit uint256(const std::vector& vch) : base_blob<256>(vch) {} + + /** A cheap hash function that just returns 64 bits from the result, it can be + * used when the contents are considered uniformly random. It is not appropriate + * when the value can easily be influenced from outside as e.g. a network adversary could + * provide values to trigger worst-case behavior. + * @note The result of this function is not stable between little and big endian. + */ + uint64_t GetCheapHash() const + { + uint64_t result; + memcpy((void*)&result, (void*)data, 8); + return result; + } + + /** A more secure, salted hash function. + * @note This hash is not stable between little and big endian. + */ + uint64_t GetHash(const uint256& salt) const; +}; + +/* uint256 from const char *. + * This is a separate function because the constructor uint256(const char*) can result + * in dangerously catching uint256(0). + */ +inline uint256 uint256S(const char *str) +{ + uint256 rv; + rv.SetHex(str); + return rv; +} +/* uint256 from std::string. + * This is a separate function because the constructor uint256(const std::string &str) can result + * in dangerously catching uint256(0) via std::string(const char*). + */ +inline uint256 uint256S(const std::string& str) +{ + uint256 rv; + rv.SetHex(str); + return rv; +} + +#endif // BITCOIN_UINT256_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/utilstrencodings.cpp b/src/Native/libverushash/crypto/utilstrencodings.cpp new file mode 100644 index 000000000..d5289a17a --- /dev/null +++ b/src/Native/libverushash/crypto/utilstrencodings.cpp @@ -0,0 +1,500 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "utilstrencodings.h" + +#include "tinyformat.h" + +#include +#include +#include +#include +#include + +using namespace std; + +string SanitizeString(const string& str) +{ + /** + * safeChars chosen to allow simple messages/URLs/email addresses, but avoid anything + * even possibly remotely dangerous like & or > + */ + static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 .,;_/:?@()"); + string strResult; + for (std::string::size_type i = 0; i < str.size(); i++) + { + if (safeChars.find(str[i]) != std::string::npos) + strResult.push_back(str[i]); + } + return strResult; +} + +const signed char p_util_hexdigit[256] = +{ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1, + -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; + +signed char HexDigit(char c) +{ + return p_util_hexdigit[(unsigned char)c]; +} + +bool IsHex(const string& str) +{ + for(std::string::const_iterator it(str.begin()); it != str.end(); ++it) + { + if (HexDigit(*it) < 0) + return false; + } + return (str.size() > 0) && (str.size()%2 == 0); +} + +vector ParseHex(const char* psz) +{ + // convert hex dump to vector + vector vch; + while (true) + { + while (isspace(*psz)) + psz++; + signed char c = HexDigit(*psz++); + if (c == (signed char)-1) + break; + unsigned char n = (c << 4); + c = HexDigit(*psz++); + if (c == (signed char)-1) + break; + n |= c; + vch.push_back(n); + } + return vch; +} + +vector ParseHex(const string& str) +{ + return ParseHex(str.c_str()); +} + +string EncodeBase64(const unsigned char* pch, size_t len) +{ + static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + string strRet=""; + strRet.reserve((len+2)/3*4); + + int mode=0, left=0; + const unsigned char *pchEnd = pch+len; + + while (pch> 2]; + left = (enc & 3) << 4; + mode = 1; + break; + + case 1: // we have two bits + strRet += pbase64[left | (enc >> 4)]; + left = (enc & 15) << 2; + mode = 2; + break; + + case 2: // we have four bits + strRet += pbase64[left | (enc >> 6)]; + strRet += pbase64[enc & 63]; + mode = 0; + break; + } + } + + if (mode) + { + strRet += pbase64[left]; + strRet += '='; + if (mode == 1) + strRet += '='; + } + + return strRet; +} + +string EncodeBase64(const string& str) +{ + return EncodeBase64((const unsigned char*)str.c_str(), str.size()); +} + +vector DecodeBase64(const char* p, bool* pfInvalid) +{ + static const int decode64_table[256] = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, + -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + + if (pfInvalid) + *pfInvalid = false; + + vector vchRet; + vchRet.reserve(strlen(p)*3/4); + + int mode = 0; + int left = 0; + + while (1) + { + int dec = decode64_table[(unsigned char)*p]; + if (dec == -1) break; + p++; + switch (mode) + { + case 0: // we have no bits and get 6 + left = dec; + mode = 1; + break; + + case 1: // we have 6 bits and keep 4 + vchRet.push_back((left<<2) | (dec>>4)); + left = dec & 15; + mode = 2; + break; + + case 2: // we have 4 bits and get 6, we keep 2 + vchRet.push_back((left<<4) | (dec>>2)); + left = dec & 3; + mode = 3; + break; + + case 3: // we have 2 bits and get 6 + vchRet.push_back((left<<6) | dec); + mode = 0; + break; + } + } + + if (pfInvalid) + switch (mode) + { + case 0: // 4n base64 characters processed: ok + break; + + case 1: // 4n+1 base64 character processed: impossible + *pfInvalid = true; + break; + + case 2: // 4n+2 base64 characters processed: require '==' + if (left || p[0] != '=' || p[1] != '=' || decode64_table[(unsigned char)p[2]] != -1) + *pfInvalid = true; + break; + + case 3: // 4n+3 base64 characters processed: require '=' + if (left || p[0] != '=' || decode64_table[(unsigned char)p[1]] != -1) + *pfInvalid = true; + break; + } + + return vchRet; +} + +string DecodeBase64(const string& str) +{ + vector vchRet = DecodeBase64(str.c_str()); + return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size()); +} + +string EncodeBase32(const unsigned char* pch, size_t len) +{ + static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; + + string strRet=""; + strRet.reserve((len+4)/5*8); + + int mode=0, left=0; + const unsigned char *pchEnd = pch+len; + + while (pch> 3]; + left = (enc & 7) << 2; + mode = 1; + break; + + case 1: // we have three bits + strRet += pbase32[left | (enc >> 6)]; + strRet += pbase32[(enc >> 1) & 31]; + left = (enc & 1) << 4; + mode = 2; + break; + + case 2: // we have one bit + strRet += pbase32[left | (enc >> 4)]; + left = (enc & 15) << 1; + mode = 3; + break; + + case 3: // we have four bits + strRet += pbase32[left | (enc >> 7)]; + strRet += pbase32[(enc >> 2) & 31]; + left = (enc & 3) << 3; + mode = 4; + break; + + case 4: // we have two bits + strRet += pbase32[left | (enc >> 5)]; + strRet += pbase32[enc & 31]; + mode = 0; + } + } + + static const int nPadding[5] = {0, 6, 4, 3, 1}; + if (mode) + { + strRet += pbase32[left]; + for (int n=0; n DecodeBase32(const char* p, bool* pfInvalid) +{ + static const int decode32_table[256] = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + + if (pfInvalid) + *pfInvalid = false; + + vector vchRet; + vchRet.reserve((strlen(p))*5/8); + + int mode = 0; + int left = 0; + + while (1) + { + int dec = decode32_table[(unsigned char)*p]; + if (dec == -1) break; + p++; + switch (mode) + { + case 0: // we have no bits and get 5 + left = dec; + mode = 1; + break; + + case 1: // we have 5 bits and keep 2 + vchRet.push_back((left<<3) | (dec>>2)); + left = dec & 3; + mode = 2; + break; + + case 2: // we have 2 bits and keep 7 + left = left << 5 | dec; + mode = 3; + break; + + case 3: // we have 7 bits and keep 4 + vchRet.push_back((left<<1) | (dec>>4)); + left = dec & 15; + mode = 4; + break; + + case 4: // we have 4 bits, and keep 1 + vchRet.push_back((left<<4) | (dec>>1)); + left = dec & 1; + mode = 5; + break; + + case 5: // we have 1 bit, and keep 6 + left = left << 5 | dec; + mode = 6; + break; + + case 6: // we have 6 bits, and keep 3 + vchRet.push_back((left<<2) | (dec>>3)); + left = dec & 7; + mode = 7; + break; + + case 7: // we have 3 bits, and keep 0 + vchRet.push_back((left<<5) | dec); + mode = 0; + break; + } + } + + if (pfInvalid) + switch (mode) + { + case 0: // 8n base32 characters processed: ok + break; + + case 1: // 8n+1 base32 characters processed: impossible + case 3: // +3 + case 6: // +6 + *pfInvalid = true; + break; + + case 2: // 8n+2 base32 characters processed: require '======' + if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || p[4] != '=' || p[5] != '=' || decode32_table[(unsigned char)p[6]] != -1) + *pfInvalid = true; + break; + + case 4: // 8n+4 base32 characters processed: require '====' + if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || decode32_table[(unsigned char)p[4]] != -1) + *pfInvalid = true; + break; + + case 5: // 8n+5 base32 characters processed: require '===' + if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || decode32_table[(unsigned char)p[3]] != -1) + *pfInvalid = true; + break; + + case 7: // 8n+7 base32 characters processed: require '=' + if (left || p[0] != '=' || decode32_table[(unsigned char)p[1]] != -1) + *pfInvalid = true; + break; + } + + return vchRet; +} + +string DecodeBase32(const string& str) +{ + vector vchRet = DecodeBase32(str.c_str()); + return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size()); +} + +bool ParseInt32(const std::string& str, int32_t *out) +{ + char *endp = NULL; + errno = 0; // strtol will not set errno if valid + long int n = strtol(str.c_str(), &endp, 10); + if(out) *out = (int)n; + // Note that strtol returns a *long int*, so even if strtol doesn't report a over/underflow + // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit + // platforms the size of these types may be different. + return endp && *endp == 0 && !errno && + n >= std::numeric_limits::min() && + n <= std::numeric_limits::max(); +} + +std::string FormatParagraph(const std::string in, size_t width, size_t indent) +{ + std::stringstream out; + size_t col = 0; + size_t ptr = 0; + while(ptr < in.size()) + { + // Find beginning of next word + ptr = in.find_first_not_of(' ', ptr); + if (ptr == std::string::npos) + break; + // Find end of next word + size_t endword = in.find_first_of(' ', ptr); + if (endword == std::string::npos) + endword = in.size(); + // Add newline and indentation if this wraps over the allowed width + if (col > 0) + { + if ((col + endword - ptr) > width) + { + out << '\n'; + for(size_t i=0; i +#include +#include + +#define BEGIN(a) ((char*)&(a)) +#define END(a) ((char*)&((&(a))[1])) +#define UBEGIN(a) ((unsigned char*)&(a)) +#define UEND(a) ((unsigned char*)&((&(a))[1])) +#define ARRAYLEN(array) (sizeof(array)/sizeof((array)[0])) + +/** This is needed because the foreach macro can't get over the comma in pair */ +#define PAIRTYPE(t1, t2) std::pair + +std::string SanitizeString(const std::string& str); +std::vector ParseHex(const char* psz); +std::vector ParseHex(const std::string& str); +signed char HexDigit(char c); +bool IsHex(const std::string& str); +std::vector DecodeBase64(const char* p, bool* pfInvalid = NULL); +std::string DecodeBase64(const std::string& str); +std::string EncodeBase64(const unsigned char* pch, size_t len); +std::string EncodeBase64(const std::string& str); +std::vector DecodeBase32(const char* p, bool* pfInvalid = NULL); +std::string DecodeBase32(const std::string& str); +std::string EncodeBase32(const unsigned char* pch, size_t len); +std::string EncodeBase32(const std::string& str); + +std::string i64tostr(int64_t n); +std::string itostr(int n); +int64_t atoi64(const char* psz); +int64_t atoi64(const std::string& str); +int atoi(const std::string& str); + +/** + * Convert string to signed 32-bit integer with strict parse error feedback. + * @returns true if the entire string could be parsed as valid integer, + * false if not the entire string could be parsed or when overflow or underflow occurred. + */ +bool ParseInt32(const std::string& str, int32_t *out); + +template +std::string HexStr(const T itbegin, const T itend, bool fSpaces=false) +{ + std::string rv; + static const char hexmap[16] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + rv.reserve((itend-itbegin)*3); + for(T it = itbegin; it < itend; ++it) + { + unsigned char val = (unsigned char)(*it); + if(fSpaces && it != itbegin) + rv.push_back(' '); + rv.push_back(hexmap[val>>4]); + rv.push_back(hexmap[val&15]); + } + + return rv; +} + +template +inline std::string HexStr(const T& vch, bool fSpaces=false) +{ + return HexStr(vch.begin(), vch.end(), fSpaces); +} + +/** + * Format a paragraph of text to a fixed width, adding spaces for + * indentation to any added line. + */ +std::string FormatParagraph(const std::string in, size_t width=79, size_t indent=0); + +/** + * Timing-attack-resistant comparison. + * Takes time proportional to length + * of first argument. + */ +template +bool TimingResistantEqual(const T& a, const T& b) +{ + if (b.size() == 0) return a.size() == 0; + size_t accumulator = a.size() ^ b.size(); + for (size_t i = 0; i < a.size(); i++) + accumulator |= a[i] ^ b[i%b.size()]; + return accumulator == 0; +} + +#endif // BITCOIN_UTILSTRENCODINGS_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/verus_clhash.cpp b/src/Native/libverushash/crypto/verus_clhash.cpp new file mode 100644 index 000000000..5dd4e069f --- /dev/null +++ b/src/Native/libverushash/crypto/verus_clhash.cpp @@ -0,0 +1,1068 @@ +/* + * This uses veriations of the clhash algorithm for Verus Coin, licensed + * with the Apache-2.0 open source license. + * + * Copyright (c) 2018 Michael Toutonghi + * Distributed under the Apache 2.0 software license, available in the original form for clhash + * here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a + * + * Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser + * Faster 64-bit universal hashing + * using carry-less multiplications, Journal of Cryptographic Engineering (to appear) + * + * Best used on recent x64 processors (Haswell or better). + * + * This implements an intermediate step in the last part of a Verus block hash. The intent of this step + * is to more effectively equalize FPGAs over GPUs and CPUs. + * + **/ + +#include "verus_hash.h" + +#include +#include + +#ifdef _WIN32 +#pragma warning (disable : 4146) +#include +#endif +int __cpuverusoptimized = 0x80; + +#if defined(__arm__) || defined(__aarch64__) +#include "crypto/SSE2NEON.h" +#else +#include +#endif + +#ifdef _WIN32 +#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) +#endif + +thread_local thread_specific_ptr verusclhasher_key; +thread_local thread_specific_ptr verusclhasher_descr; + +#if defined(__APPLE__) || defined(_WIN32) +// attempt to workaround horrible mingw/gcc destructor bug on Windows and Mac, which passes garbage in the this pointer +// we use the opportunity of control here to clean up all of our tls variables. we could keep a list, but this is a safe, +// functional hack +thread_specific_ptr::~thread_specific_ptr() { + if (verusclhasher_key.ptr) + { + verusclhasher_key.reset(); + } + if (verusclhasher_descr.ptr) + { + verusclhasher_descr.reset(); + } +} +#endif // defined(__APPLE__) || defined(_WIN32) +#if defined(__arm__) || defined(__aarch64__) //intrinsics not defined in SSE2NEON.h + +static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(uint64_t hi, uint64_t lo) + { + __m128i result; + ((uint64_t *)&result)[0] = lo; + ((uint64_t *)&result)[1] = hi; + return result; + } + +static inline __attribute__((always_inline)) __m128i _mm_mulhrs_epi16(__m128i _a, __m128i _b) +{ + int16_t result[8]; + int16_t *a = (int16_t*)&_a, *b = (int16_t*)&_b; + for (int i = 0; i < 8; i++) + { + result[i] = (int16_t)((((int32_t)(a[i]) * (int32_t)(b[i])) + 0x4000) >> 15); + } + + return *(__m128i *)result; +} + +__m128i _mm_cvtsi64_si128(uint64_t lo) +{ + __m128i result; + ((uint64_t *)&result)[0] = lo; + ((uint64_t *)&result)[1] = 0; + return result; +} + + static inline __attribute__((always_inline)) uint8x16_t _mm_aesenc_si128 (uint8x16_t a, uint8x16_t RoundKey) +{ + return vaesmcq_u8(vaeseq_u8(a, (uint8x16_t){})) ^ RoundKey; +} + + + static inline __attribute__((always_inline)) __m128i _mm_clmulepi64_si128(const __m128i a, const __m128i &b, int imm) +{ + return (__m128i)vmull_p64(vgetq_lane_u64(a, 1), vgetq_lane_u64(b,0)); + +} + +__m128i _mm_setr_epi8(u_char c0, u_char c1, u_char c2, u_char c3, u_char c4, u_char c5, u_char c6, u_char c7, u_char c8, u_char c9, u_char c10, u_char c11, u_char c12, u_char c13, u_char c14, u_char c15) +{ + __m128i result; + ((uint8_t *)&result)[0] = c0; + ((uint8_t *)&result)[1] = c1; + ((uint8_t *)&result)[2] = c2; + ((uint8_t *)&result)[3] = c3; + ((uint8_t *)&result)[4] = c4; + ((uint8_t *)&result)[5] = c5; + ((uint8_t *)&result)[6] = c6; + ((uint8_t *)&result)[7] = c7; + ((uint8_t *)&result)[8] = c8; + ((uint8_t *)&result)[9] = c9; + ((uint8_t *)&result)[10] = c10; + ((uint8_t *)&result)[11] = c11; + ((uint8_t *)&result)[12] = c12; + ((uint8_t *)&result)[13] = c13; + ((uint8_t *)&result)[14] = c14; + ((uint8_t *)&result)[15] = c15; + return result; +} +__m128i _mm_shuffle_epi8(__m128i a, __m128i b) +{ + __m128i result; + for (int i = 0; i < 16; i++) + { + if (((uint8_t *)&b)[i] & 0x80) + { + ((uint8_t *)&result)[i] = 0; + } + else + { + ((uint8_t *)&result)[i] = ((uint8_t *)&a)[((uint8_t *)&b)[i] & 0xf]; + } + } + return result; +} + int64_t _mm_cvtsi128_si64(__m128i a) +{ + return ((int64_t *)&a)[0]; +} +__m128i _mm_loadl_epi64(__m128i *a) +{ + __m128i b = {0}; ((uint64_t*)&b)[0] = ((uint64_t*)a)[0]; + return b; +} +#endif + +// multiply the length and the some key, no modulo + static inline __attribute__((always_inline)) __m128i lazyLengthHash(uint64_t keylength, uint64_t length) { + + const __m128i lengthvector = _mm_set_epi64x(keylength,length); + const __m128i clprod1 = _mm_clmulepi64_si128( lengthvector, lengthvector, 0x10); + return clprod1; +} + +// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64 + static inline __attribute__((always_inline)) __m128i precompReduction64_si128( __m128i A) { + //const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0) + const __m128i C = _mm_cvtsi64_si128((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); + __m128i Q2 = _mm_clmulepi64_si128( A, C, 0x01); + __m128i Q3 = _mm_shuffle_epi8(_mm_setr_epi8(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153), + _mm_srli_si128(Q2,8)); + __m128i Q4 = _mm_xor_si128(Q2,A); + const __m128i final = _mm_xor_si128(Q3,Q4); + return final;/// WARNING: HIGH 64 BITS CONTAIN GARBAGE +} + + static inline __attribute__((always_inline)) uint64_t precompReduction64( __m128i A) { + return _mm_cvtsi128_si64(precompReduction64_si128(A)); +} + + static inline __attribute__((always_inline)) void fixupkey(__m128i **pMoveScratch, verusclhash_descr *pdesc) { + uint32_t ofs = pdesc->keySizeInBytes >> 4; + for (__m128i *pfixup = *pMoveScratch; pfixup; pfixup = *++pMoveScratch) + { + const __m128i fixup = _mm_load_si128((__m128i *)(pfixup + ofs)); + _mm_store_si128((__m128i *)pfixup, fixup); + } +} + + static inline __attribute__((always_inline)) void haraka512_keyed_local(unsigned char *out, const unsigned char *in, const u128 *rc) { + u128 s[4], tmp; + + s[0] = LOAD(in); + s[1] = LOAD(in + 16); + s[2] = LOAD(in + 32); + s[3] = LOAD(in + 48); + + AES4(s[0], s[1], s[2], s[3], 0); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 8); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 16); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 24); + MIX4(s[0], s[1], s[2], s[3]); + + AES4(s[0], s[1], s[2], s[3], 32); + MIX4(s[0], s[1], s[2], s[3]); + + s[0] = _mm_xor_si128(s[0], LOAD(in)); + s[1] = _mm_xor_si128(s[1], LOAD(in + 16)); + s[2] = _mm_xor_si128(s[2], LOAD(in + 32)); + s[3] = _mm_xor_si128(s[3], LOAD(in + 48)); + + TRUNCSTORE(out, s[0], s[1], s[2], s[3]); +} + +// verus intermediate hash extra +__m128i __verusclmulwithoutreduction64alignedrepeat(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + __m128i const *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + const uint64_t selector = _mm_cvtsi128_si64(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *(pMoveScratch++) = prand; + *(pMoveScratch++) = prandex; + + // select random start and order of pbuf processing + pbuf = buf + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10); + acc = _mm_xor_si128(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + acc = _mm_xor_si128(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + acc = _mm_xor_si128(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64(acc); + const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); + acc = _mm_xor_si128(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + _mm_store_si128(prand, tempb3); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128(pbuf); + + AES2(temp1, temp2, 0); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 4); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 8); + MIX2(temp1, temp2); + + acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc)); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesroundoffset = 0; + __m128i onekey; + + do + { + if (selector & (0x10000000 << rounds)) + { + onekey = _mm_load_si128(rc++); + const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + } + else + { + onekey = _mm_load_si128(rc++); + __m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); + AES2(onekey, temp2, aesroundoffset); + aesroundoffset += 4; + MIX2(onekey, temp2); + acc = _mm_xor_si128(onekey, acc); + acc = _mm_xor_si128(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x18: + { + const __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i temp2 = _mm_load_si128(prand); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); + + const __m128i tempb3 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + _mm_store_si128(prand, tempb3); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128(pbuf); + const __m128i temp2 = _mm_load_si128(prandex); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + acc = _mm_xor_si128(tempa3, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3); + _mm_store_si128(prandex, tempb2); + break; + } + } + } + return acc; +} + +// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times, +// returning a 64 bit hash value +uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i acc = __verusclmulwithoutreduction64alignedrepeat((__m128i *)random, (const __m128i *)buf, keyMask, pMoveScratch); + acc = _mm_xor_si128(acc, lazyLengthHash(1024, 64)); + return precompReduction64(acc); +} + +// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times, +// returning a 64 bit hash value +uint64_t verusclhash_sv2_1(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i acc = __verusclmulwithoutreduction64alignedrepeat_sv2_1((__m128i *)random, (const __m128i *)buf, keyMask, pMoveScratch); + acc = _mm_xor_si128(acc, lazyLengthHash(1024, 64)); + return precompReduction64(acc); +} + +uint64_t verusclhash_sv2_2(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i acc = __verusclmulwithoutreduction64alignedrepeat_sv2_2((__m128i *)random, (const __m128i *)buf, keyMask, pMoveScratch); + acc = _mm_xor_si128(acc, lazyLengthHash(1024, 64)); + return precompReduction64(acc); +} + +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_1(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + const __m128i pbuf_copy[4] = {_mm_xor_si128(buf[0], buf[2]), _mm_xor_si128(buf[1], buf[3]), buf[2], buf[3]}; + const __m128i *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + const uint64_t selector = _mm_cvtsi128_si64(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *(pMoveScratch++) = prand; + *(pMoveScratch++) = prandex; + + // select random start and order of pbuf processing + pbuf = pbuf_copy + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10); + acc = _mm_xor_si128(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + acc = _mm_xor_si128(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + acc = _mm_xor_si128(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64(acc); + const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); + acc = _mm_xor_si128(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + _mm_store_si128(prand, tempb3); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128(pbuf); + + AES2(temp1, temp2, 0); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 4); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 8); + MIX2(temp1, temp2); + + acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc)); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris - modified to cast to uint64_t on shift for more variability in the loop + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesroundoffset = 0; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128(rc++); + const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + } + else + { + onekey = _mm_load_si128(rc++); + __m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); + AES2(onekey, temp2, aesroundoffset); + aesroundoffset += 4; + MIX2(onekey, temp2); + acc = _mm_xor_si128(onekey, acc); + acc = _mm_xor_si128(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x18: + { + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128(rc++); + const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + // cannot be zero here, may be negative + const int32_t divisor = (uint32_t)selector; + const int64_t dividend = _mm_cvtsi128_si64(add1); + const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); + acc = _mm_xor_si128(modulo, acc); + } + else + { + onekey = _mm_load_si128(rc++); + __m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + const __m128i clprod2 = _mm_mulhrs_epi16(acc, clprod1); + acc = _mm_xor_si128(clprod2, acc); + } + } while (rounds--); + + const __m128i tempa3 = _mm_load_si128(prandex); + const __m128i tempa4 = _mm_xor_si128(tempa3, acc); + _mm_store_si128(prandex, tempa4); + _mm_store_si128(prand, onekey); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128(pbuf); + const __m128i temp2 = _mm_load_si128(prandex); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + acc = _mm_xor_si128(tempa3, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3); + _mm_store_si128(prandex, tempb2); + break; + } + } + } + return acc; +} + +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_2(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + const __m128i pbuf_copy[4] = {_mm_xor_si128(buf[0], buf[2]), _mm_xor_si128(buf[1], buf[3]), buf[2], buf[3]}; + const __m128i *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + const uint64_t selector = _mm_cvtsi128_si64(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *(pMoveScratch++) = prand; + *(pMoveScratch++) = prandex; + + // select random start and order of pbuf processing + pbuf = pbuf_copy + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10); + acc = _mm_xor_si128(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + acc = _mm_xor_si128(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128(prandex); + const __m128i temp2 = _mm_load_si128(pbuf); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + acc = _mm_xor_si128(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128(prand); + const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64(acc); + const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); + acc = _mm_xor_si128(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128(pbuf); + const __m128i add12 = _mm_xor_si128(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); + acc = _mm_xor_si128(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); + acc = _mm_xor_si128(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); + const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); + _mm_store_si128(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa2); + _mm_store_si128(prand, tempb3); + const __m128i tempb4 = _mm_load_si128(pbuf); + acc = _mm_xor_si128(tempb4, acc); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128(pbuf); + + AES2(temp1, temp2, 0); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 4); + MIX2(temp1, temp2); + + AES2(temp1, temp2, 8); + MIX2(temp1, temp2); + + acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc)); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris - modified to cast to uint64_t on shift for more variability in the loop + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesroundoffset = 0; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128(rc++); + const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + } + else + { + onekey = _mm_load_si128(rc++); + __m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); + AES2(onekey, temp2, aesroundoffset); + aesroundoffset += 4; + MIX2(onekey, temp2); + acc = _mm_xor_si128(onekey, acc); + acc = _mm_xor_si128(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128(prand); + const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128(prandex); + _mm_store_si128(prandex, tempa3); + _mm_store_si128(prand, tempa4); + break; + } + case 0x18: + { + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128(rc++); + const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); + onekey = _mm_xor_si128(onekey, temp2); + // cannot be zero here, may be negative + const int32_t divisor = (uint32_t)selector; + const int64_t dividend = _mm_cvtsi128_si64(onekey); + const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); + acc = _mm_xor_si128(modulo, acc); + } + else + { + onekey = _mm_load_si128(rc++); + __m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); + const __m128i add1 = _mm_xor_si128(onekey, temp2); + onekey = _mm_clmulepi64_si128(add1, add1, 0x10); + const __m128i clprod2 = _mm_mulhrs_epi16(acc, onekey); + acc = _mm_xor_si128(clprod2, acc); + } + } while (rounds--); + + const __m128i tempa3 = _mm_load_si128(prandex); + const __m128i tempa4 = _mm_xor_si128(tempa3, acc); + + _mm_store_si128(prandex, onekey); + _mm_store_si128(prand, tempa4); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128(pbuf); + const __m128i temp2 = _mm_load_si128(prandex); + const __m128i add1 = _mm_xor_si128(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); + acc = _mm_xor_si128(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); + const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128(prand); + _mm_store_si128(prand, tempa2); + + acc = _mm_xor_si128(tempa3, acc); + const __m128i temp4 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); + acc = _mm_xor_si128(temp4,acc); + const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3); + _mm_store_si128(prandex, tempb2); + break; + } + } + } + return acc; +} + +void *alloc_aligned_buffer(uint64_t bufSize) +{ + void *answer = NULL; + if (posix_memalign(&answer, sizeof(__m128i)*2, bufSize)) + { + return NULL; + } + else + { + return answer; + } +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/verus_clhash.h b/src/Native/libverushash/crypto/verus_clhash.h new file mode 100644 index 000000000..1f578c5e6 --- /dev/null +++ b/src/Native/libverushash/crypto/verus_clhash.h @@ -0,0 +1,304 @@ +/* + * This uses variations of the clhash algorithm for Verus Coin, licensed + * with the Apache-2.0 open source license. + * + * Copyright (c) 2018 Michael Toutonghi + * Distributed under the Apache 2.0 software license, available in the original form for clhash + * here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a + * + * CLHash is a very fast hashing function that uses the + * carry-less multiplication and SSE instructions. + * + * Original CLHash code (C) 2017, 2018 Daniel Lemire and Owen Kaser + * Faster 64-bit universal hashing + * using carry-less multiplications, Journal of Cryptographic Engineering (to appear) + * + * Best used on recent x64 processors (Haswell or better). + * + **/ + +#ifndef INCLUDE_VERUS_CLHASH_H +#define INCLUDE_VERUS_CLHASH_H + +#ifndef _WIN32 +#include +#include +#else +#include +#endif // !WIN32 + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) +typedef unsigned char u_char; +#endif + +enum { + // Verus Key size must include the equivalent size of a Haraka key + // after the first part. + // Any excess over a power of 2 will not get mutated, and any excess over + // power of 2 + Haraka sized key will not be used + VERUSKEYSIZE=1024 * 8 + (40 * 16), + SOLUTION_VERUSHHASH_V2 = 1, // this must be in sync with CScript::SOLUTION_VERUSV2 + SOLUTION_VERUSHHASH_V2_1 = 3, // this must be in sync with CScript::ACTIVATE_VERUSHASH2_1 + SOLUTION_VERUSHHASH_V2_2 = 4 +}; + +struct verusclhash_descr +{ + uint256 seed; + uint32_t keySizeInBytes; +}; + +struct thread_specific_ptr { + void *ptr; + thread_specific_ptr() { ptr = NULL; } + void reset(void *newptr = NULL) + { + if (ptr && ptr != newptr) + { + std::free(ptr); + } + ptr = newptr; + + } + void *get() { return ptr; } +#if defined(__APPLE__) || defined(_WIN32) + // horrible MingW and Mac with gcc thread local storage bug workaround + ~thread_specific_ptr(); +#else + ~thread_specific_ptr() { + this->reset(); + } +#endif +}; + +extern thread_local thread_specific_ptr verusclhasher_key; +extern thread_local thread_specific_ptr verusclhasher_descr; + +extern int __cpuverusoptimized; + +__m128i __verusclmulwithoutreduction64alignedrepeat(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_1(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_2(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); +__m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_1_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_2_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); + +inline bool IsCPUVerusOptimized() +{ + #if defined(__arm__) || defined(__aarch64__) + long hwcaps= getauxval(AT_HWCAP); + + if((hwcaps & HWCAP_AES) && (hwcaps & HWCAP_PMULL)) + __cpuverusoptimized = true; + else + __cpuverusoptimized = false; + + #else + if (__cpuverusoptimized & 0x80) + { + unsigned int eax,ebx,ecx,edx; + if (!__get_cpuid(1,&eax,&ebx,&ecx,&edx)) + { + __cpuverusoptimized = false; + } + else + { + __cpuverusoptimized = ((ecx & (bit_AVX | bit_AES | bit_PCLMUL)) == (bit_AVX | bit_AES | bit_PCLMUL)); + } + } + #endif + return __cpuverusoptimized; +}; + +inline void ForceCPUVerusOptimized(bool trueorfalse) +{ + __cpuverusoptimized = trueorfalse; +}; + +uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +uint64_t verusclhash_sv2_1(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +uint64_t verusclhash_sv2_1_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +uint64_t verusclhash_sv2_2(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +uint64_t verusclhash_sv2_2_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); +void *alloc_aligned_buffer(uint64_t bufSize); + +#ifdef __cplusplus +} // extern "C" +#endif + +#ifdef __cplusplus +// special high speed hasher for VerusHash 2.0 +struct verusclhasher { + uint64_t keySizeInBytes; + uint64_t keyMask; + uint64_t (*verusclhashfunction)(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch); + __m128i (*verusinternalclhashfunction)(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch); + + static inline uint64_t keymask(uint64_t keysize) + { + int i = 0; + while (keysize >>= 1) + { + i++; + } + return i ? (((uint64_t)1) << i) - 1 : 0; + } + + // align on 256 bit boundary at end + verusclhasher(uint64_t keysize=VERUSKEYSIZE, int solutionVersion=SOLUTION_VERUSHHASH_V2) : keySizeInBytes((keysize >> 5) << 5) + { +#ifdef __APPLE__ + __tls_init(); +#endif + if (IsCPUVerusOptimized()) + { + if (solutionVersion >= SOLUTION_VERUSHHASH_V2_1) + { + if (solutionVersion >= SOLUTION_VERUSHHASH_V2_2) + { + verusclhashfunction = &verusclhash_sv2_2; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat_sv2_2; + } + else + { + verusclhashfunction = &verusclhash_sv2_1; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat_sv2_1; + } + } + else + { + verusclhashfunction = &verusclhash; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat; + } + } + else + { + if (solutionVersion >= SOLUTION_VERUSHHASH_V2_1) + { + if (solutionVersion >= SOLUTION_VERUSHHASH_V2_2) + { + verusclhashfunction = &verusclhash_sv2_2_port; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat_sv2_2_port; + } + else + { + verusclhashfunction = &verusclhash_sv2_1_port; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat_sv2_1_port; + } + } + else + { + verusclhashfunction = &verusclhash_port; + verusinternalclhashfunction = &__verusclmulwithoutreduction64alignedrepeat_port; + } + } + + // if we changed, change it + if (verusclhasher_key.get() && keySizeInBytes != ((verusclhash_descr *)verusclhasher_descr.get())->keySizeInBytes) + { + verusclhasher_key.reset(); + verusclhasher_descr.reset(); + } + // get buffer space for mutating and refresh keys + void *key = NULL; + if (!(key = verusclhasher_key.get()) && + (verusclhasher_key.reset((unsigned char *)alloc_aligned_buffer(keySizeInBytes << 1)), key = verusclhasher_key.get())) + { + verusclhash_descr *pdesc; + if (verusclhasher_descr.reset(new verusclhash_descr()), pdesc = (verusclhash_descr *)verusclhasher_descr.get()) + { + pdesc->keySizeInBytes = keySizeInBytes; + } + else + { + verusclhasher_key.reset(); + key = NULL; + } + } + if (key) + { + keyMask = keymask(keySizeInBytes); + } + else + { + keyMask = 0; + keySizeInBytes = 0; + } +#ifdef VERUSHASHDEBUG + printf("New hasher, keyMask: %lx, newKeySize: %lx\n", keyMask, keySizeInBytes); +#endif + } + + inline void *gethasherrefresh() + { + verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); + return (unsigned char *)verusclhasher_key.get() + pdesc->keySizeInBytes; + } + + // returns a per thread, writeable scratch pad that has enough space to hold a pointer for each + // mutated entry in the refresh hash + inline __m128i **getpmovescratch(void *hasherrefresh) + { + return (__m128i **)((unsigned char *)hasherrefresh + keyrefreshsize()); + } + + inline verusclhash_descr *gethasherdescription() const + { + return (verusclhash_descr *)verusclhasher_descr.get(); + } + + inline uint64_t keyrefreshsize() const + { + return keyMask + 1; + } + + inline void *fixupkey(void *hashKey, verusclhash_descr &desc) + { + unsigned char *ret = (unsigned char *)hashKey; + uint32_t ofs = desc.keySizeInBytes >> 4; + __m128i **ppfixup = getpmovescratch(ret + desc.keySizeInBytes); // past the part to refresh from + for (__m128i *pfixup = *ppfixup; pfixup; pfixup = *++ppfixup) + { + *pfixup = *(pfixup + ofs); // we hope the compiler cancels this operation out before add + } + return hashKey; + } + + // this prepares a key for hashing and mutation by copying it from the original key for this block + // WARNING!! this does not check for NULL ptr, so make sure the buffer is allocated + inline void *gethashkey() + { + unsigned char *ret = (unsigned char *)verusclhasher_key.get(); + return fixupkey(ret, *(verusclhash_descr *)verusclhasher_descr.get()); + } + + inline uint64_t operator()(const unsigned char buf[64]) const { + unsigned char *pkey = (unsigned char *)verusclhasher_key.get(); + verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); + return (*verusclhashfunction)(pkey, buf, keyMask, (__m128i **)(pkey + (pdesc->keySizeInBytes + keyrefreshsize()))); + } + + inline uint64_t operator()(const unsigned char buf[64], void *pkey) const { + verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); + return (*verusclhashfunction)(pkey, buf, keyMask, (__m128i **)((unsigned char *)pkey + (pdesc->keySizeInBytes + keyrefreshsize()))); + } + + inline uint64_t operator()(const unsigned char buf[64], void *pkey, __m128i **pMoveScratch) const { + return (*verusclhashfunction)((unsigned char *)pkey, buf, keyMask, pMoveScratch); + } +}; + +#endif // #ifdef __cplusplus + +#endif // INCLUDE_VERUS_CLHASH_H \ No newline at end of file diff --git a/src/Native/libverushash/crypto/verus_clhash_portable.cpp b/src/Native/libverushash/crypto/verus_clhash_portable.cpp new file mode 100644 index 000000000..8622b5bde --- /dev/null +++ b/src/Native/libverushash/crypto/verus_clhash_portable.cpp @@ -0,0 +1,1199 @@ +/* + * This uses veriations of the clhash algorithm for Verus Coin, licensed + * with the Apache-2.0 open source license. + * + * Copyright (c) 2018 Michael Toutonghi + * Distributed under the Apache 2.0 software license, available in the original form for clhash + * here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a + * + * Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser + * Faster 64-bit universal hashing + * using carry-less multiplications, Journal of Cryptographic Engineering (to appear) + * + * Best used on recent x64 processors (Haswell or better). + * + * This implements an intermediate step in the last part of a Verus block hash. The intent of this step + * is to more effectively equalize FPGAs over GPUs and CPUs. + * + **/ + + +#include "verus_hash.h" + +#include +#include + +#ifdef __APPLE__ +#include +#endif// APPLE + +#ifdef __linux__ + +#if defined(__i386__) || defined(__X86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include "crypto/SSE2NEON.h" +#endif + +#elif _WIN32 +#pragma warning (disable : 4146) +#include +#endif + +void clmul64(uint64_t a, uint64_t b, uint64_t* r) +{ + uint8_t s = 4,i; //window size + uint64_t two_s = 1 << s; //2^s + uint64_t smask = two_s-1; //s 1 bits + uint64_t u[16]; + uint64_t tmp; + uint64_t ifmask; + //Precomputation + u[0] = 0; + u[1] = b; + for(i = 2 ; i < two_s; i += 2){ + u[i] = u[i >> 1] << 1; //even indices: left shift + u[i + 1] = u[i] ^ b; //odd indices: xor b + } + //Multiply + r[0] = u[a & smask]; //first window only affects lower word + r[1] = 0; + for(i = s ; i < 64 ; i += s){ + tmp = u[a >> i & smask]; + r[0] ^= tmp << i; + r[1] ^= tmp >> (64 - i); + } + //Repair + uint64_t m = 0xEEEEEEEEEEEEEEEE; //s=4 => 16 times 1110 + for(i = 1 ; i < s ; i++){ + tmp = ((a & m) >> i); + m &= m << 1; //shift mask to exclude all bit j': j' mod s = i + ifmask = -((b >> (64-i)) & 1); //if the (64-i)th bit of b is 1 + r[1] ^= (tmp & ifmask); + } +} + +u128 _mm_clmulepi64_si128_emu(const __m128i &a, const __m128i &b, int imm) +{ + uint64_t result[2]; + clmul64(*((uint64_t*)&a + (imm & 1)), *((uint64_t*)&b + ((imm & 0x10) >> 4)), result); + + /* + // TEST + const __m128i tmp1 = _mm_load_si128(&a); + const __m128i tmp2 = _mm_load_si128(&b); + imm = imm & 0x11; + const __m128i testresult = (imm == 0x10) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x10) : ((imm == 0x01) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x01) : ((imm == 0x00) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x00) : _mm_clmulepi64_si128(tmp1, tmp2, 0x11))); + if (!memcmp(&testresult, &result, 16)) + { + printf("_mm_clmulepi64_si128_emu: Portable version passed!\n"); + } + else + { + printf("_mm_clmulepi64_si128_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, imm: %x, emu: %lxh %lxl, intrin: %lxh %lxl\n", + *((uint64_t *)&a + 1), *(uint64_t *)&a, + *((uint64_t *)&b + 1), *(uint64_t *)&b, + imm, + *((uint64_t *)result + 1), *(uint64_t *)result, + *((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); + return testresult; + } + */ + + return *(__m128i *)result; +} + +u128 _mm_mulhrs_epi16_emu(__m128i _a, __m128i _b) +{ + int16_t result[8]; + int16_t *a = (int16_t*)&_a, *b = (int16_t*)&_b; + for (int i = 0; i < 8; i ++) + { + result[i] = (int16_t)((((int32_t)(a[i]) * (int32_t)(b[i])) + 0x4000) >> 15); + } + + /* + const __m128i testresult = _mm_mulhrs_epi16(_a, _b); + if (!memcmp(&testresult, &result, 16)) + { + printf("_mm_mulhrs_epi16_emu: Portable version passed!\n"); + } + else + { + printf("_mm_mulhrs_epi16_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, emu: %lxh %lxl, intrin: %lxh %lxl\n", + *((uint64_t *)&a + 1), *(uint64_t *)&a, + *((uint64_t *)&b + 1), *(uint64_t *)&b, + *((uint64_t *)result + 1), *(uint64_t *)result, + *((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); + } + */ + + return *(__m128i *)result; +} + +inline u128 _mm_set_epi64x_emu(uint64_t hi, uint64_t lo) +{ + __m128i result; + ((uint64_t *)&result)[0] = lo; + ((uint64_t *)&result)[1] = hi; + return result; +} + +inline u128 _mm_cvtsi64_si128_emu(uint64_t lo) +{ + __m128i result; + ((uint64_t *)&result)[0] = lo; + ((uint64_t *)&result)[1] = 0; + return result; +} + +inline int64_t _mm_cvtsi128_si64_emu(const __m128i &a) +{ + return *(int64_t *)&a; +} + +inline int32_t _mm_cvtsi128_si32_emu(const __m128i &a) +{ + return *(int32_t *)&a; +} + +inline u128 _mm_cvtsi32_si128_emu(uint32_t lo) +{ + __m128i result; + ((uint32_t *)&result)[0] = lo; + ((uint32_t *)&result)[1] = 0; + ((uint64_t *)&result)[1] = 0; + + /* + const __m128i testresult = _mm_cvtsi32_si128(lo); + if (!memcmp(&testresult, &result, 16)) + { + printf("_mm_cvtsi32_si128_emu: Portable version passed!\n"); + } + else + { + printf("_mm_cvtsi32_si128_emu: Portable version failed!\n"); + } + */ + + return result; +} + +u128 _mm_setr_epi8_emu(u_char c0, u_char c1, u_char c2, u_char c3, u_char c4, u_char c5, u_char c6, u_char c7, u_char c8, u_char c9, u_char c10, u_char c11, u_char c12, u_char c13, u_char c14, u_char c15) +{ + __m128i result; + ((uint8_t *)&result)[0] = c0; + ((uint8_t *)&result)[1] = c1; + ((uint8_t *)&result)[2] = c2; + ((uint8_t *)&result)[3] = c3; + ((uint8_t *)&result)[4] = c4; + ((uint8_t *)&result)[5] = c5; + ((uint8_t *)&result)[6] = c6; + ((uint8_t *)&result)[7] = c7; + ((uint8_t *)&result)[8] = c8; + ((uint8_t *)&result)[9] = c9; + ((uint8_t *)&result)[10] = c10; + ((uint8_t *)&result)[11] = c11; + ((uint8_t *)&result)[12] = c12; + ((uint8_t *)&result)[13] = c13; + ((uint8_t *)&result)[14] = c14; + ((uint8_t *)&result)[15] = c15; + + /* + const __m128i testresult = _mm_setr_epi8(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15); + if (!memcmp(&testresult, &result, 16)) + { + printf("_mm_setr_epi8_emu: Portable version passed!\n"); + } + else + { + printf("_mm_setr_epi8_emu: Portable version failed!\n"); + } + */ + + return result; +} + +inline __m128i _mm_srli_si128_emu(__m128i a, int imm8) +{ + unsigned char result[16]; + uint8_t shift = imm8 & 0xff; + if (shift > 15) shift = 16; + + int i; + for (i = 0; i < (16 - shift); i++) + { + result[i] = ((unsigned char *)&a)[shift + i]; + } + for ( ; i < 16; i++) + { + result[i] = 0; + } + + /* + const __m128i tmp1 = _mm_load_si128(&a); + __m128i testresult = _mm_srli_si128(tmp1, imm8); + if (!memcmp(&testresult, result, 16)) + { + printf("_mm_srli_si128_emu: Portable version passed!\n"); + } + else + { + printf("_mm_srli_si128_emu: Portable version failed! val: %lx%lx imm: %x emu: %lx%lx, intrin: %lx%lx\n", + *((uint64_t *)&a + 1), *(uint64_t *)&a, + imm8, + *((uint64_t *)result + 1), *(uint64_t *)result, + *((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); + } + */ + + return *(__m128i *)result; +} + +inline __m128i _mm_xor_si128_emu(__m128i a, __m128i b) +{ +#ifdef _WIN32 + uint64_t result[2]; + result[0] = *(uint64_t *)&a ^ *(uint64_t *)&b; + result[1] = *((uint64_t *)&a + 1) ^ *((uint64_t *)&b + 1); + return *(__m128i *)result; +#else + return a ^ b; +#endif +} + +inline __m128i _mm_load_si128_emu(const void *p) +{ + return *(__m128i *)p; +} + +inline void _mm_store_si128_emu(void *p, __m128i val) +{ + *(__m128i *)p = val; +} + +__m128i _mm_shuffle_epi8_emu(__m128i a, __m128i b) +{ + __m128i result; + for (int i = 0; i < 16; i++) + { + if (((uint8_t *)&b)[i] & 0x80) + { + ((uint8_t *)&result)[i] = 0; + } + else + { + ((uint8_t *)&result)[i] = ((uint8_t *)&a)[((uint8_t *)&b)[i] & 0xf]; + } + } + + /* + const __m128i tmp1 = _mm_load_si128(&a); + const __m128i tmp2 = _mm_load_si128(&b); + __m128i testresult = _mm_shuffle_epi8(tmp1, tmp2); + if (!memcmp(&testresult, &result, 16)) + { + printf("_mm_shuffle_epi8_emu: Portable version passed!\n"); + } + else + { + printf("_mm_shuffle_epi8_emu: Portable version failed!\n"); + } + */ + + return result; +} + +// portable +static inline __m128i lazyLengthHash_port(uint64_t keylength, uint64_t length) { + const __m128i lengthvector = _mm_set_epi64x_emu(keylength,length); + const __m128i clprod1 = _mm_clmulepi64_si128_emu( lengthvector, lengthvector, 0x10); + return clprod1; +} + +// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64 +static inline __m128i precompReduction64_si128_port( __m128i A) { + + //const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0) + const __m128i C = _mm_cvtsi64_si128_emu((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); + __m128i Q2 = _mm_clmulepi64_si128_emu( A, C, 0x01); + __m128i Q3 = _mm_shuffle_epi8_emu(_mm_setr_epi8_emu(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153), + _mm_srli_si128_emu(Q2,8)); + __m128i Q4 = _mm_xor_si128_emu(Q2,A); + const __m128i final = _mm_xor_si128_emu(Q3,Q4); + return final;/// WARNING: HIGH 64 BITS SHOULD BE ASSUMED TO CONTAIN GARBAGE +} + +static inline uint64_t precompReduction64_port( __m128i A) { + __m128i tmp = precompReduction64_si128_port(A); + return _mm_cvtsi128_si64_emu(tmp); +} + +// verus intermediate hash extra +__m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + __m128i const *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128_emu(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + const uint64_t selector = _mm_cvtsi128_si64_emu(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *pMoveScratch++ = prand; + *pMoveScratch++ = prandex; + + // select random start and order of pbuf processing + pbuf = buf + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128_emu(temp2, temp2, 0x10); + acc = _mm_xor_si128_emu(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + acc = _mm_xor_si128_emu(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + acc = _mm_xor_si128_emu(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128_emu(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64_emu(acc); + const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); + acc = _mm_xor_si128_emu(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + _mm_store_si128_emu(prand, tempb3); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128_emu(pbuf); + + AES2_EMU(temp1, temp2, 0); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 4); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 8); + MIX2_EMU(temp1, temp2); + + acc = _mm_xor_si128_emu(temp1, acc); + acc = _mm_xor_si128_emu(temp2, acc); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesround = 0; + __m128i onekey; + + do + { + // note that due to compiler and CPUs, we expect this to do: + // if (selector & ((0x10000000 << rounds) & 0xffffffff) if rounds != 3 else selector & 0xffffffff80000000): + if (selector & (0x10000000 << rounds)) + { + onekey = _mm_load_si128_emu(rc++); + const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + } + else + { + onekey = _mm_load_si128_emu(rc++); + __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); + const uint64_t roundidx = aesround++ << 2; + AES2_EMU(onekey, temp2, roundidx); + + MIX2_EMU(onekey, temp2); + + acc = _mm_xor_si128_emu(onekey, acc); + acc = _mm_xor_si128_emu(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x18: + { + const __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i temp2 = _mm_load_si128_emu(prand); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); + + const __m128i tempb3 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + _mm_store_si128_emu(prand, tempb3); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128_emu(pbuf); + const __m128i temp2 = _mm_load_si128_emu(prandex); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + acc = _mm_xor_si128_emu(tempa3, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, tempa3); + _mm_store_si128_emu(prandex, tempb2); + break; + } + } + } + return acc; +} + +// verus intermediate hash extra +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_1_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + const __m128i pbuf_copy[4] = {_mm_xor_si128(buf[0],buf[2]), _mm_xor_si128(buf[1],buf[3]), buf[2], buf[3]}; + const __m128i *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128_emu(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + const uint64_t selector = _mm_cvtsi128_si64_emu(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *pMoveScratch++ = prand; + *pMoveScratch++ = prandex; + + // select random start and order of pbuf processing + pbuf = pbuf_copy + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128_emu(temp2, temp2, 0x10); + acc = _mm_xor_si128_emu(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + acc = _mm_xor_si128_emu(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + acc = _mm_xor_si128_emu(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128_emu(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64_emu(acc); + const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); + acc = _mm_xor_si128_emu(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + _mm_store_si128_emu(prand, tempb3); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128_emu(pbuf); + + AES2_EMU(temp1, temp2, 0); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 4); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 8); + MIX2_EMU(temp1, temp2); + + acc = _mm_xor_si128_emu(temp1, acc); + acc = _mm_xor_si128_emu(temp2, acc); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesround = 0; + __m128i onekey; + + do + { + // this is simplified over the original verus_clhash + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128_emu(rc++); + const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + } + else + { + onekey = _mm_load_si128_emu(rc++); + __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); + const uint64_t roundidx = aesround++ << 2; + AES2_EMU(onekey, temp2, roundidx); + + MIX2_EMU(onekey, temp2); + + acc = _mm_xor_si128_emu(onekey, acc); + acc = _mm_xor_si128_emu(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x18: + { + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128_emu(rc++); + const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + // cannot be zero here, may be negative + const int32_t divisor = (uint32_t)selector; + const int64_t dividend = _mm_cvtsi128_si64_emu(add1); + const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); + acc = _mm_xor_si128_emu(modulo, acc); + } + else + { + onekey = _mm_load_si128_emu(rc++); + __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + const __m128i clprod2 = _mm_mulhrs_epi16_emu(acc, clprod1); + acc = _mm_xor_si128_emu(clprod2, acc); + } + } while (rounds--); + + const __m128i tempa3 = _mm_load_si128_emu(prandex); + const __m128i tempa4 = _mm_xor_si128_emu(tempa3, acc); + _mm_store_si128_emu(prandex, tempa4); + _mm_store_si128_emu(prand, onekey); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128_emu(pbuf); + const __m128i temp2 = _mm_load_si128_emu(prandex); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + acc = _mm_xor_si128_emu(tempa3, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, tempa3); + _mm_store_si128_emu(prandex, tempb2); + break; + } + } + } + return acc; +} + +// verus intermediate hash extra +__m128i __verusclmulwithoutreduction64alignedrepeat_sv2_2_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask, __m128i **pMoveScratch) +{ + const __m128i pbuf_copy[4] = {_mm_xor_si128(buf[0],buf[2]), _mm_xor_si128(buf[1],buf[3]), buf[2], buf[3]}; + const __m128i *pbuf; + + // divide key mask by 16 from bytes to __m128i + keyMask >>= 4; + + // the random buffer must have at least 32 16 byte dwords after the keymask to work with this + // algorithm. we take the value from the last element inside the keyMask + 2, as that will never + // be used to xor into the accumulator before it is hashed with other values first + __m128i acc = _mm_load_si128_emu(randomsource + (keyMask + 2)); + + for (int64_t i = 0; i < 32; i++) + { + //std::cout << "LOOP " << i << " acc: " << LEToHex(acc) << std::endl; + + const uint64_t selector = _mm_cvtsi128_si64_emu(acc); + + // get two random locations in the key, which will be mutated and swapped + __m128i *prand = randomsource + ((selector >> 5) & keyMask); + __m128i *prandex = randomsource + ((selector >> 32) & keyMask); + + *pMoveScratch++ = prand; + *pMoveScratch++ = prandex; + + // select random start and order of pbuf processing + pbuf = pbuf_copy + (selector & 3); + + switch (selector & 0x1c) + { + case 0: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 4: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + const __m128i clprod2 = _mm_clmulepi64_si128_emu(temp2, temp2, 0x10); + acc = _mm_xor_si128_emu(clprod2, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + acc = _mm_xor_si128_emu(add12, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + break; + } + case 8: + { + const __m128i temp1 = _mm_load_si128_emu(prandex); + const __m128i temp2 = _mm_load_si128_emu(pbuf); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + acc = _mm_xor_si128_emu(add1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + const __m128i temp12 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prandex, tempb2); + break; + } + case 0xc: + { + const __m128i temp1 = _mm_load_si128_emu(prand); + const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + + // cannot be zero here + const int32_t divisor = (uint32_t)selector; + + acc = _mm_xor_si128_emu(add1, acc); + + const int64_t dividend = _mm_cvtsi128_si64_emu(acc); + const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); + acc = _mm_xor_si128_emu(modulo, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); + + if (dividend & 1) + { + const __m128i temp12 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + + const __m128i temp22 = _mm_load_si128_emu(pbuf); + const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); + const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); + acc = _mm_xor_si128_emu(clprod12, acc); + const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); + acc = _mm_xor_si128_emu(clprod22, acc); + + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); + _mm_store_si128_emu(prand, tempb2); + } + else + { + const __m128i tempb3 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa2); + _mm_store_si128_emu(prand, tempb3); + const __m128i tempb4 = _mm_load_si128_emu(pbuf); + acc = _mm_xor_si128_emu(tempb4, acc); + } + break; + } + case 0x10: + { + // a few AES operations + const __m128i *rc = prand; + __m128i tmp; + + __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + __m128i temp2 = _mm_load_si128_emu(pbuf); + + AES2_EMU(temp1, temp2, 0); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 4); + MIX2_EMU(temp1, temp2); + + AES2_EMU(temp1, temp2, 8); + MIX2_EMU(temp1, temp2); + + acc = _mm_xor_si128_emu(temp1, acc); + acc = _mm_xor_si128_emu(temp2, acc); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x14: + { + // we'll just call this one the monkins loop, inspired by Chris + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + __m128i tmp; // used by MIX2 + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + uint64_t aesround = 0; + __m128i onekey; + + do + { + // this is simplified over the original verus_clhash + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128_emu(rc++); + const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + } + else + { + onekey = _mm_load_si128_emu(rc++); + __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); + const uint64_t roundidx = aesround++ << 2; + AES2_EMU(onekey, temp2, roundidx); + + MIX2_EMU(onekey, temp2); + + acc = _mm_xor_si128_emu(onekey, acc); + acc = _mm_xor_si128_emu(temp2, acc); + } + } while (rounds--); + + const __m128i tempa1 = _mm_load_si128_emu(prand); + const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); + const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); + + const __m128i tempa4 = _mm_load_si128_emu(prandex); + _mm_store_si128_emu(prandex, tempa3); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x18: + { + const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); + + uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times + __m128i *rc = prand; + __m128i onekey; + + do + { + if (selector & (((uint64_t)0x10000000) << rounds)) + { + onekey = _mm_load_si128_emu(rc++); + const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); + onekey = _mm_xor_si128_emu(onekey, temp2); + // cannot be zero here, may be negative + const int32_t divisor = (uint32_t)selector; + const int64_t dividend = _mm_cvtsi128_si64_emu(onekey); + const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); + acc = _mm_xor_si128_emu(modulo, acc); + } + else + { + onekey = _mm_load_si128_emu(rc++); + __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); + const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); + onekey = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + const __m128i clprod2 = _mm_mulhrs_epi16_emu(acc, onekey); + acc = _mm_xor_si128_emu(clprod2, acc); + } + } while (rounds--); + + const __m128i tempa3 = _mm_load_si128_emu(prandex); + const __m128i tempa4 = _mm_xor_si128_emu(tempa3, acc); + _mm_store_si128_emu(prandex, onekey); + _mm_store_si128_emu(prand, tempa4); + break; + } + case 0x1c: + { + const __m128i temp1 = _mm_load_si128_emu(pbuf); + const __m128i temp2 = _mm_load_si128_emu(prandex); + const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); + const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); + acc = _mm_xor_si128_emu(clprod1, acc); + + const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); + const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); + + const __m128i tempa3 = _mm_load_si128_emu(prand); + _mm_store_si128_emu(prand, tempa2); + + acc = _mm_xor_si128_emu(tempa3, acc); + const __m128i temp4 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); + acc = _mm_xor_si128_emu(temp4,acc); + const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, tempa3); + const __m128i tempb2 = _mm_xor_si128_emu(tempb1, tempa3); + _mm_store_si128_emu(prandex, tempb2); + break; + } + } + } + return acc; +} + +// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times, +// returning a 64 bit hash value +uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i * rs64 = (__m128i *)random; + const __m128i * string = (const __m128i *) buf; + + __m128i acc = __verusclmulwithoutreduction64alignedrepeat_port(rs64, string, keyMask, pMoveScratch); + acc = _mm_xor_si128_emu(acc, lazyLengthHash_port(1024, 64)); + return precompReduction64_port(acc); +} + +// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times, +// returning a 64 bit hash value +uint64_t verusclhash_sv2_1_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i * rs64 = (__m128i *)random; + const __m128i * string = (const __m128i *) buf; + + __m128i acc = __verusclmulwithoutreduction64alignedrepeat_sv2_1_port(rs64, string, keyMask, pMoveScratch); + acc = _mm_xor_si128_emu(acc, lazyLengthHash_port(1024, 64)); + return precompReduction64_port(acc); +} + +uint64_t verusclhash_sv2_2_port(void * random, const unsigned char buf[64], uint64_t keyMask, __m128i **pMoveScratch) { + __m128i * rs64 = (__m128i *)random; + const __m128i * string = (const __m128i *) buf; + + __m128i acc = __verusclmulwithoutreduction64alignedrepeat_sv2_2_port(rs64, string, keyMask, pMoveScratch); + acc = _mm_xor_si128_emu(acc, lazyLengthHash_port(1024, 64)); + return precompReduction64_port(acc); +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/verus_hash.cpp b/src/Native/libverushash/crypto/verus_hash.cpp new file mode 100644 index 000000000..192636e99 --- /dev/null +++ b/src/Native/libverushash/crypto/verus_hash.cpp @@ -0,0 +1,175 @@ +// (C) 2018 The Verus Developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +/* +This provides the PoW hash function for Verus, a CPU-optimized hash +function with a Haraka V2 core. Unlike Haraka, which is made for short +inputs only, Verus Hash takes any length of input and produces a 256 +bit output. +*/ +#include +#include "common.h" +#include "verus_hash.h" + +void (*CVerusHash::haraka512Function)(unsigned char *out, const unsigned char *in); + +void CVerusHash::Hash(void *result, const void *data, size_t _len) +{ + unsigned char buf[128]; + unsigned char *bufPtr = buf; + int nextOffset = 64; + uint32_t pos = 0, len = _len; + unsigned char *bufPtr2 = bufPtr + nextOffset; + unsigned char *ptr = (unsigned char *)data; + + // put our last result or zero at beginning of buffer each time + memset(bufPtr, 0, 32); + + // digest up to 32 bytes at a time + for ( ; pos < len; pos += 32) + { + if (len - pos >= 32) + { + memcpy(bufPtr + 32, ptr + pos, 32); + } + else + { + int i = (int)(len - pos); + memcpy(bufPtr + 32, ptr + pos, i); + memset(bufPtr + 32 + i, 0, 32 - i); + } + (*haraka512Function)(bufPtr2, bufPtr); + bufPtr2 = bufPtr; + bufPtr += nextOffset; + nextOffset *= -1; + } + memcpy(result, bufPtr, 32); +}; + +void CVerusHash::init() +{ + if (IsCPUVerusOptimized()) + { + haraka512Function = &haraka512_zero; + } + else + { + haraka512Function = &haraka512_port_zero; + } +} + +CVerusHash &CVerusHash::Write(const unsigned char *data, size_t _len) +{ + unsigned char *tmp; + uint32_t pos, len = _len; + + // digest up to 32 bytes at a time + for ( pos = 0; pos < len; ) + { + uint32_t room = 32 - curPos; + + if (len - pos >= room) + { + memcpy(curBuf + 32 + curPos, data + pos, room); + (*haraka512Function)(result, curBuf); + tmp = curBuf; + curBuf = result; + result = tmp; + pos += room; + curPos = 0; + } + else + { + memcpy(curBuf + 32 + curPos, data + pos, len - pos); + curPos += len - pos; + pos = len; + } + } + return *this; +} + +void (*CVerusHashV2::haraka512Function)(unsigned char *out, const unsigned char *in); +void (*CVerusHashV2::haraka512KeyedFunction)(unsigned char *out, const unsigned char *in, const u128 *rc); +void (*CVerusHashV2::haraka256Function)(unsigned char *out, const unsigned char *in); + +void CVerusHashV2::init() +{ + if (IsCPUVerusOptimized()) + { + load_constants(); + haraka512Function = &haraka512; + haraka512KeyedFunction = &haraka512_keyed; + haraka256Function = &haraka256; + } + else + { + // load the haraka constants + load_constants_port(); + haraka512Function = &haraka512_port; + haraka512KeyedFunction = &haraka512_port_keyed; + haraka256Function = &haraka256_port; + } +} + +void CVerusHashV2::Hash(void *result, const void *data, size_t _len) +{ + unsigned char buf[128]; + unsigned char *bufPtr = buf; + int len = _len, pos = 0, nextOffset = 64; + unsigned char *bufPtr2 = bufPtr + nextOffset; + unsigned char *ptr = (unsigned char *)data; + + // put our last result or zero at beginning of buffer each time + memset(bufPtr, 0, 32); + + // digest up to 32 bytes at a time + for ( ; pos < len; pos += 32) + { + if (len - pos >= 32) + { + memcpy(bufPtr + 32, ptr + pos, 32); + } + else + { + int i = (int)(len - pos); + memcpy(bufPtr + 32, ptr + pos, i); + memset(bufPtr + 32 + i, 0, 32 - i); + } + (*haraka512Function)(bufPtr2, bufPtr); + bufPtr2 = bufPtr; + bufPtr += nextOffset; + nextOffset *= -1; + } + memcpy(result, bufPtr, 32); +}; + +CVerusHashV2 &CVerusHashV2::Write(const unsigned char *data, size_t _len) +{ + unsigned char *tmp; + int len = _len; + + // digest up to 32 bytes at a time + for (int pos = 0; pos < len; ) + { + int room = 32 - curPos; + + if (len - pos >= room) + { + memcpy(curBuf + 32 + curPos, data + pos, room); + (*haraka512Function)(result, curBuf); + tmp = curBuf; + curBuf = result; + result = tmp; + pos += room; + curPos = 0; + } + else + { + memcpy(curBuf + 32 + curPos, data + pos, len - pos); + curPos += len - pos; + pos = len; + } + } + return *this; +} \ No newline at end of file diff --git a/src/Native/libverushash/crypto/verus_hash.h b/src/Native/libverushash/crypto/verus_hash.h new file mode 100644 index 000000000..3a2db8af4 --- /dev/null +++ b/src/Native/libverushash/crypto/verus_hash.h @@ -0,0 +1,235 @@ +// (C) 2018 Michael Toutonghi +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +/* +This provides the PoW hash function for Verus, enabling CPU mining. +*/ +#ifndef VERUS_HASH_H_ +#define VERUS_HASH_H_ + +// verbose output when defined +//#define VERUSHASHDEBUG 1 + +#include +#include + +#include "uint256.h" +#include "verus_clhash.h" + +extern "C" +{ +#include "haraka.h" +#include "haraka_portable.h" +} + +class CVerusHash +{ + public: + static void Hash(void *result, const void *data, size_t len); + static void (*haraka512Function)(unsigned char *out, const unsigned char *in); + + static void init(); + + CVerusHash() { } + + CVerusHash &Write(const unsigned char *data, size_t len); + + CVerusHash &Reset() + { + curBuf = buf1; + result = buf2; + curPos = 0; + std::fill(buf1, buf1 + sizeof(buf1), 0); + return *this; + } + + int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); } + void ClearExtra() + { + if (curPos) + { + std::fill(curBuf + 32 + curPos, curBuf + 64, 0); + } + } + void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); } + + void Finalize(unsigned char hash[32]) + { + if (curPos) + { + std::fill(curBuf + 32 + curPos, curBuf + 64, 0); + (*haraka512Function)(hash, curBuf); + } + else + std::memcpy(hash, curBuf, 32); + } + + private: + // only buf1, the first source, needs to be zero initialized + unsigned char buf1[64] = {0}, buf2[64]; + unsigned char *curBuf = buf1, *result = buf2; + size_t curPos = 0; +}; + +class CVerusHashV2 +{ + public: + static void Hash(void *result, const void *data, size_t len); + static void (*haraka512Function)(unsigned char *out, const unsigned char *in); + static void (*haraka512KeyedFunction)(unsigned char *out, const unsigned char *in, const u128 *rc); + static void (*haraka256Function)(unsigned char *out, const unsigned char *in); + + static void init(); + + verusclhasher vclh; + + CVerusHashV2(int solutionVerusion=SOLUTION_VERUSHHASH_V2) : vclh(VERUSKEYSIZE, solutionVerusion) { + // we must have allocated key space, or can't run + if (!verusclhasher_key.get()) + { + printf("ERROR: failed to allocate hash buffer - terminating\n"); + assert(false); + } + } + + CVerusHashV2 &Write(const unsigned char *data, size_t len); + + inline CVerusHashV2 &Reset() + { + curBuf = buf1; + result = buf2; + curPos = 0; + std::fill(buf1, buf1 + sizeof(buf1), 0); + return *this; + } + + inline int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); } + inline void ClearExtra() + { + if (curPos) + { + std::fill(curBuf + 32 + curPos, curBuf + 64, 0); + } + } + + template + inline void FillExtra(const T *_data) + { + unsigned char *data = (unsigned char *)_data; + int pos = curPos; + int left = 32 - pos; + do + { + int len = left > (int)sizeof(T) ? sizeof(T) : left; + std::memcpy(curBuf + 32 + pos, data, len); + pos += len; + left -= len; + } while (left > 0); + } + inline void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); } + inline void ExtraHashKeyed(unsigned char hash[32], u128 *key) { (*haraka512KeyedFunction)(hash, curBuf, key); } + + void Finalize(unsigned char hash[32]) + { + if (curPos) + { + std::fill(curBuf + 32 + curPos, curBuf + 64, 0); + (*haraka512Function)(hash, curBuf); + } + else + std::memcpy(hash, curBuf, 32); + } + + // chains Haraka256 from 32 bytes to fill the key + static u128 *GenNewCLKey(unsigned char *seedBytes32) + { + unsigned char *key = (unsigned char *)verusclhasher_key.get(); + verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); + int size = pdesc->keySizeInBytes; + int refreshsize = verusclhasher::keymask(size) + 1; + // skip keygen if it is the current key + if (pdesc->seed != *((uint256 *)seedBytes32)) + { + // generate a new key by chain hashing with Haraka256 from the last curbuf + int n256blks = size >> 5; + int nbytesExtra = size & 0x1f; + unsigned char *pkey = key; + unsigned char *psrc = seedBytes32; + for (int i = 0; i < n256blks; i++) + { + (*haraka256Function)(pkey, psrc); + psrc = pkey; + pkey += 32; + } + if (nbytesExtra) + { + unsigned char buf[32]; + (*haraka256Function)(buf, psrc); + memcpy(pkey, buf, nbytesExtra); + } + pdesc->seed = *((uint256 *)seedBytes32); + memcpy(key + size, key, refreshsize); + } + else + { + memcpy(key, key + size, refreshsize); + } + + memset((unsigned char *)key + (size + refreshsize), 0, size - refreshsize); + return (u128 *)key; + } + + inline uint64_t IntermediateTo128Offset(uint64_t intermediate) + { + // the mask is where we wrap + uint64_t mask = vclh.keyMask >> 4; + return intermediate & mask; + } + + void Finalize2b(unsigned char hash[32]) + { + // fill buffer to the end with the beginning of it to prevent any foreknowledge of + // bits that may contain zero + FillExtra((u128 *)curBuf); + +#ifdef VERUSHASHDEBUG + uint256 *bhalf1 = (uint256 *)curBuf; + uint256 *bhalf2 = bhalf1 + 1; + printf("Curbuf: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str()); +#endif + + // gen new key with what is last in buffer + u128 *key = GenNewCLKey(curBuf); + + // run verusclhash on the buffer + uint64_t intermediate = vclh(curBuf, key); + + // fill buffer to the end with the result + FillExtra(&intermediate); + +#ifdef VERUSHASHDEBUG + printf("intermediate %lx\n", intermediate); + printf("Curbuf: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str()); + bhalf1 = (uint256 *)key; + bhalf2 = bhalf1 + ((vclh.keyMask + 1) >> 5); + printf(" Key: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str()); +#endif + + // get the final hash with a mutated dynamic key for each hash result + (*haraka512KeyedFunction)(hash, curBuf, key + IntermediateTo128Offset(intermediate)); + } + + inline unsigned char *CurBuffer() + { + return curBuf; + } + + private: + // only buf1, the first source, needs to be zero initialized + alignas(32) unsigned char buf1[64] = {0}, buf2[64]; + unsigned char *curBuf = buf1, *result = buf2; + size_t curPos = 0; +}; + +#endif \ No newline at end of file diff --git a/src/Native/libverushash/exports.cpp b/src/Native/libverushash/exports.cpp new file mode 100644 index 000000000..ddd9c31e6 --- /dev/null +++ b/src/Native/libverushash/exports.cpp @@ -0,0 +1,49 @@ +/* +Copyright 2017 Coin Foundry (coinfoundry.org) +Authors: Oliver Weichhold (oliver@weichhold.com) +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "verushashverify.h" + +#ifdef _WIN32 +#define MODULE_API __declspec(dllexport) +#else +#define MODULE_API +#endif + +extern "C" MODULE_API void verushash2b2_export(char* input, char* output, int input_length) +{ + verushash2b2(input, output, input_length); +} + +extern "C" MODULE_API void verushash2b1_export(char* input, char* output, int input_length) +{ + verushash2b1(input, output, input_length); +} + +extern "C" MODULE_API void verushash2b_export(char* input, char* output, int input_length) +{ + verushash2b(input, output, input_length); +} + +extern "C" MODULE_API void verushash2_export(char* input, char* output, int input_length) +{ + verushash2(input, output, input_length); +} + +extern "C" MODULE_API void verushash_export(char* input, char* output, int input_length) +{ + verushash(input, output, input_length); +} \ No newline at end of file diff --git a/src/Native/libverushash/libverushash.sln b/src/Native/libverushash/libverushash.sln new file mode 100644 index 000000000..8ebe5d5f5 --- /dev/null +++ b/src/Native/libverushash/libverushash.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.31229.75 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libverushash", "libverushash.vcxproj", "{2DE74E14-BF6D-4046-951B-8EBC8A1BA009}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Debug|x64.ActiveCfg = Debug|x64 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Debug|x64.Build.0 = Debug|x64 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Debug|x86.ActiveCfg = Debug|Win32 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Debug|x86.Build.0 = Debug|Win32 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Release|x64.ActiveCfg = Release|x64 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Release|x64.Build.0 = Release|x64 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Release|x86.ActiveCfg = Release|Win32 + {2DE74E14-BF6D-4046-951B-8EBC8A1BA009}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {DDE0FE54-030A-4DFD-98A1-952779FB461F} + EndGlobalSection +EndGlobal diff --git a/src/Native/libverushash/libverushash.vcxproj b/src/Native/libverushash/libverushash.vcxproj new file mode 100644 index 000000000..b902bffb9 --- /dev/null +++ b/src/Native/libverushash/libverushash.vcxproj @@ -0,0 +1,203 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942} + Win32Proj + netmultihashnative + 10.0 + libverushash + + + + DynamicLibrary + true + v143 + Unicode + + + DynamicLibrary + false + v143 + true + Unicode + + + DynamicLibrary + true + v143 + Unicode + + + DynamicLibrary + false + v143 + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)\..\..\..\..\boost_1_62_0;$(ProjectDir)..\libmultihash\windows\include\libsodium;$(IncludePath);$(ProjectDir) + $(SolutionDir)\..\..\..\..\boost_1_62_0\lib32-msvc-14.0;$(LibraryPath) + + + true + $(SolutionDir)\..\..\..\..\boost_1_62_0;$(ProjectDir)..\libmultihash\windows\include\libsodium;$(IncludePath);$(ProjectDir) + $(SolutionDir)\..\..\..\..\boost_1_62_0\lib64-msvc-14.0;$(ProjectDir)..\libmultihash\windows\lib\x64;$(LibraryPath) + + + false + $(SolutionDir)\..\..\..\..\boost_1_62_0;$(ProjectDir)..\libmultihash\windows\include\libsodium;$(IncludePath);$(ProjectDir) + $(SolutionDir)\..\..\..\..\boost_1_62_0\lib32-msvc-14.0;$(LibraryPath) + + + false + $(SolutionDir)\..\..\..\..\boost_1_62_0;$(ProjectDir)..\libmultihash\windows\include\libsodium;$(ProjectDir)..\libmultihash\windows\include\libsodium;$(IncludePath);$(ProjectDir) + $(SolutionDir)\..\..\..\..\boost_1_62_0\lib64-msvc-14.0;$(LibraryPath) + + + + + + Level3 + Disabled + SODIUM_STATIC;_CRT_SECURE_NO_WARNINGS;HAVE_SSE2 + true + MultiThreadedDebug + stdcpp14 + + + Windows + true + Ws2_32.lib;$(ProjectDir)..\libmultihash\windows\lib\x86\libsodium.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + SODIUM_STATIC;_CRT_SECURE_NO_WARNINGS;HAVE_SSE2 + true + MultiThreadedDebug + stdcpp14 + + + Windows + true + Ws2_32.lib;$(ProjectDir)..\libmultihash\windows\lib\x64\libsodium.lib;%(AdditionalDependencies) + + + + + + + Level3 + MaxSpeed + true + true + SODIUM_STATIC;_CRT_SECURE_NO_WARNINGS;HAVE_SSE2 + true + MultiThreaded + stdcpp14 + + + Windows + true + true + true + Ws2_32.lib;$(ProjectDir)..\libmultihash\windows\lib\x86\libsodium.lib;%(AdditionalDependencies) + + + + + + + Level3 + MaxSpeed + true + true + SODIUM_STATIC;_CRT_SECURE_NO_WARNINGS;HAVE_SSE2 + true + MultiThreaded + stdcpp14 + + + Windows + true + true + true + Ws2_32.lib;$(ProjectDir)..\libmultihash\windows\lib\x64\libsodium.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/Native/libverushash/sodium.h b/src/Native/libverushash/sodium.h new file mode 100644 index 000000000..6e5aa8736 --- /dev/null +++ b/src/Native/libverushash/sodium.h @@ -0,0 +1,69 @@ + +#ifndef sodium_H +#define sodium_H + +#include "../libmultihash/windows/include/libsodium/sodium/version.h" + +#include "../libmultihash/windows/include/libsodium/sodium/core.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_aead_aes256gcm.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_aead_chacha20poly1305.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_aead_xchacha20poly1305.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_auth.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_auth_hmacsha256.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_auth_hmacsha512.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_auth_hmacsha512256.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_box.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_box_curve25519xsalsa20poly1305.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_core_hsalsa20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_core_hchacha20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_core_salsa20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_core_salsa2012.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_core_salsa208.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_generichash.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_generichash_blake2b.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_hash.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_hash_sha256.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_hash_sha512.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_kdf.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_kdf_blake2b.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_kx.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_onetimeauth.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_onetimeauth_poly1305.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_pwhash.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_pwhash_argon2i.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_scalarmult.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_scalarmult_curve25519.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_secretbox.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_secretbox_xsalsa20poly1305.h" +//#include "../libmultihash/windows/include/libsodium/sodium/crypto_secretstream_xchacha20poly1305.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_shorthash.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_shorthash_siphash24.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_sign.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_sign_ed25519.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_stream.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_chacha20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_salsa20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_xsalsa20.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_verify_16.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_verify_32.h" +#include "../libmultihash/windows/include/libsodium/sodium/crypto_verify_64.h" +#include "../libmultihash/windows/include/libsodium/sodium/randombytes.h" +//#include "../libmultihash/windows/include/libsodium/sodium/randombytes_internal_random.h" +#include "../libmultihash/windows/include/libsodium/sodium/randombytes_sysrandom.h" +#include "../libmultihash/windows/include/libsodium/sodium/runtime.h" +#include "../libmultihash/windows/include/libsodium/sodium/utils.h" + +#ifndef SODIUM_LIBRARY_MINIMAL +# include "../libmultihash/windows/include/libsodium/sodium/crypto_box_curve25519xchacha20poly1305.h" +//# include "../libmultihash/windows/include/libsodium/sodium/crypto_core_ed25519.h" +//# include "../libmultihash/windows/include/libsodium/sodium/crypto_core_ristretto255.h" +//# include "../libmultihash/windows/include/libsodium/sodium/crypto_scalarmult_ed25519.h" +//# include "../libmultihash/windows/include/libsodium/sodium/crypto_scalarmult_ristretto255.h" +# include "../libmultihash/windows/include/libsodium/sodium/crypto_secretbox_xchacha20poly1305.h" +# include "../libmultihash/windows/include/libsodium/sodium/crypto_pwhash_scryptsalsa208sha256.h" +# include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_salsa2012.h" +# include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_salsa208.h" +# include "../libmultihash/windows/include/libsodium/sodium/crypto_stream_xchacha20.h" +#endif + +#endif diff --git a/src/Native/libverushash/verushashverify.cpp b/src/Native/libverushash/verushashverify.cpp new file mode 100644 index 000000000..1d9fdec72 --- /dev/null +++ b/src/Native/libverushash/verushashverify.cpp @@ -0,0 +1,125 @@ +#include "verushashverify.h" +#include + +#include "crypto/common.h" +#include "crypto/verus_hash.h" + +const unsigned char BLAKE2Bpersonal[crypto_generichash_blake2b_PERSONALBYTES] = { 'V','e','r','u','s','D','e','f','a','u','l','t','H','a','s','h' }; +uint256 blake2b_hash(unsigned char* data, unsigned long long length) +{ + const unsigned char* personal = BLAKE2Bpersonal; + crypto_generichash_blake2b_state state; + uint256 result; + if (crypto_generichash_blake2b_init_salt_personal( + &state, + NULL, 0, // No key. + 32, + NULL, // No salt. + personal) == 0) { + crypto_generichash_blake2b_update(&state, data, length); + if (crypto_generichash_blake2b_final(&state, reinterpret_cast(&result), crypto_generichash_blake2b_BYTES) == 0) { + return result; + } + } + result.SetNull(); + return result; +} + +void verushash2b2(char* input, char* output, int input_len) +{ + //CVerusHashV2* vh2b2; + CVerusHashV2::init(); + + // detect pbaas, validate and clear non-canonical data if needed + char* solution = (input + 140 + 3); + unsigned int sol_ver = ((solution[0]) + (solution[1] << 8) + (solution[2] << 16) + (solution[3] << 24)); + if (sol_ver > 6) { + //const uint8_t descrBits = solution[4]; + const uint8_t numPBaaSHeaders = solution[5]; + //const uint16_t extraSpace = solution[6] | ((uint16_t)(solution[7]) << 8); + const uint32_t soln_header_size = 4 + 1 + 1 + 2 + 32 + 32; // version, descr, numPBaas, extraSpace, hashPrevMMRroot, hashBlockMMRroot + const uint32_t soln_pbaas_cid_size = 20; // hash160 + const uint32_t soln_pbaas_prehash_sz = 32; // pre header hash blake2b + // if pbaas headers present + if (numPBaaSHeaders > 0) { + unsigned char preHeader[32 + 32 + 32 + 32 + 4 + 32 + 32] = { 0, }; + + // copy non-canonical items from block header + memcpy(&preHeader[0], input + 4, 32); // hashPrevBlock + memcpy(&preHeader[32], input + 4 + 32, 32); // hashMerkleRoot + memcpy(&preHeader[64], input + 4 + 32 + 32, 32); // hashFinalSaplingRoot + memcpy(&preHeader[96], input + 4 + 32 + 32 + 32 + 4 + 4, 32); // nNonce (if nonce changes must update preHeaderHash in solution) + memcpy(&preHeader[128], input + 4 + 32 + 32 + 32 + 4, 4); // nbits + memcpy(&preHeader[132], solution + 8, 32 + 32); // hashPrevMMRRoot, hashPrevMMRRoot + + // detect if merged mining is present and clear non-canonical data (if needed) + int matched_zeros = 0; + for (int i = 0; i < sizeof(preHeader); i++) { + if (preHeader[i] == 0) { matched_zeros++; } + } + + // if the data has already been cleared of non-canonical data, just continue along + if (matched_zeros != sizeof(preHeader)) { + // detect merged mining by looking for preHeaderHash (blake2b) in first pbaas chain definition + int matched_hashes = 0; + uint256 preHeaderHash = blake2b_hash(&preHeader[0], sizeof(preHeader)); + if (!preHeaderHash.IsNull()) { + if (memcmp((unsigned char*)&preHeaderHash, + &solution[soln_header_size + soln_pbaas_cid_size], + soln_pbaas_prehash_sz) == 0) { + matched_hashes++; + } + } + // clear non-canonical data for pbaas merge mining + if (matched_hashes > 0) { + memset(input + 4, 0, 32 + 32 + 32); // hashPrevBlock, hashMerkleRoot, hashFinalSaplingRoot + memset(input + 4 + 32 + 32 + 32 + 4, 0, 4); // nBits + memset(input + 4 + 32 + 32 + 32 + 4 + 4, 0, 32); // nNonce + memset(solution + 8, 0, 32 + 32); // hashPrevMMRRoot, hashBlockMMRRoot + //printf("info: merged mining %d chains, clearing non-canonical data on hash found\n", numPBaaSHeaders); + } else { + // invalid share, pbaas activated must be pbaas mining capatible + memset(output, 0xff, 32); + return; + } + } else { + //printf("info: merged mining %d chains, non-canonical data pre-cleared\n", numPBaaSHeaders); + } + } + } + + CVerusHashV2* vh2b2 = new CVerusHashV2(SOLUTION_VERUSHHASH_V2_2); + vh2b2->Reset(); + vh2b2->Write((const unsigned char *)input, input_len); + vh2b2->Finalize2b((unsigned char *)output); +} + +void verushash2b1(char* input, char* output, int input_len) +{ + //CVerusHashV2* vh2b1; + CVerusHashV2::init(); + CVerusHashV2* vh2b1 = new CVerusHashV2(SOLUTION_VERUSHHASH_V2_1); + vh2b1->Reset(); + vh2b1->Write((const unsigned char *)input, input_len); + vh2b1->Finalize2b((unsigned char *)output); +} + +void verushash2b(char* input, char* output, int input_len) +{ + //CVerusHashV2* vh2; + CVerusHashV2::init(); + CVerusHashV2* vh2 = new CVerusHashV2(SOLUTION_VERUSHHASH_V2); + vh2->Reset(); + vh2->Write((const unsigned char *)input, input_len); + vh2->Finalize2b((unsigned char *)output); +} + +void verushash2(char* input, char* output, int input_len) +{ + return CVerusHashV2::Hash(output, input, input_len); +} + +void verushash(char* input, char* output, int input_len) +{ + return CVerusHash::Hash(output, input, input_len); +} \ No newline at end of file diff --git a/src/Native/libverushash/verushashverify.h b/src/Native/libverushash/verushashverify.h new file mode 100644 index 000000000..47267fc25 --- /dev/null +++ b/src/Native/libverushash/verushashverify.h @@ -0,0 +1,20 @@ +#ifndef VERUSHASHVERIFY_H +#define VERUSHASHVERIFY_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void verushash2b2(char* input, char* output, int input_len); +void verushash2b1(char* input, char* output, int input_len); +void verushash2b(char* input, char* output, int input_len); +void verushash2(char* input, char* output, int input_len); +void verushash(char* input, char* output, int input_len); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file