Skip to content

Commit

Permalink
ARM: implement multiplication carry flag algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
calc84maniac authored and fleroviux committed Dec 12, 2024
1 parent aa9005b commit 61b0c90
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 13 deletions.
106 changes: 105 additions & 1 deletion src/nba/src/arm/handlers/arithmetic.inl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@
*
* Licensed under GPLv3 or any later version.
* Refer to the included LICENSE file.
*
* Multiplication carry flag algorithm has been altered from its original form according to its GPL-compatible license, as follows:
*
* Copyright (C) 2024 zaydlang, calc84maniac
*
* This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/

void SetZeroAndSignFlag(u32 value) {
Expand All @@ -11,7 +23,7 @@ void SetZeroAndSignFlag(u32 value) {
}

template<bool is_signed = true>
void TickMultiply(u32 multiplier) {
bool TickMultiply(u32 multiplier) {
u32 mask = 0xFFFFFF00;

bus.Idle();
Expand All @@ -28,6 +40,98 @@ void TickMultiply(u32 multiplier) {
mask <<= 8;
bus.Idle();
}

// Return true if full ticks used.
return mask == 0;
}

bool MultiplyCarrySimple(u32 multiplier) {
// Carry comes directly from final injected booth carry bit.
// Final booth addend is negative only if upper 2 bits are 10.
return (multiplier >> 30) == 2;
}

bool MultiplyCarryLo(u32 multiplicand, u32 multiplier, u32 accum = 0) {
// Set low bit of multiplicand to cause negation to invert the upper bits.
// This bit cannot propagate to the resulting carry bit.
multiplicand |= 1;

// Optimized first iteration.
u32 booth = (s32)(multiplier << 31) >> 31;
u32 carry = multiplicand * booth;
u32 sum = carry + accum;

int shift = 29;
do {
// Process 8 multiplier bits using 4 booth iterations.
for (int i = 0; i < 4; i++, shift -= 2) {
// Get next booth factor (-2 to 2, shifted left by 30-shift).
u32 next_booth = (s32)(multiplier << shift) >> shift;
u32 factor = next_booth - booth;
booth = next_booth;
// Get scaled value of booth addend.
u32 addend = multiplicand * factor;
// Accumulate addend with carry-save add.
accum ^= carry ^ addend;
sum += addend;
carry = sum - accum;
}
} while (booth != multiplier);

// Carry flag comes from bit 31 of carry-save adder's final carry.
return carry >> 31;
}

template<bool sign_extend>
bool MultiplyCarryHi(u32 multiplicand, u32 multiplier, u32 accum_hi = 0) {
// Only last 3 booth iterations are relevant to output carry.
// Reduce scale of both inputs to get upper bits of 64-bit booth addends
// in upper bits of 32-bit values, while handling sign extension.
if (sign_extend) {
multiplicand = (s32)multiplicand >> 6;
multiplier = (s32)multiplier >> 26;
} else {
multiplicand >>= 6;
multiplier >>= 26;
}
// Set low bit of multiplicand to cause negation to invert the upper bits.
// This bit cannot propagate to the resulting carry bit.
multiplicand |= 1;

// Pre-populate magic bit 61 for carry.
u32 carry = ~accum_hi & 0x20000000;
// Pre-populate magic bits 63-60 for accum (with carry magic pre-added).
u32 accum = accum_hi - 0x08000000;

// Get factors for last 3 booth iterations.
u32 booth0 = (s32)(multiplier << 27) >> 27;
u32 booth1 = (s32)(multiplier << 29) >> 29;
u32 booth2 = (s32)(multiplier << 31) >> 31;
u32 factor0 = multiplier - booth0;
u32 factor1 = booth0 - booth1;
u32 factor2 = booth1 - booth2;

// Get scaled value of 3rd-last booth addend.
u32 addend = multiplicand * factor2;
// Finalize bits 61-60 of accum magic using its sign.
accum -= addend & 0x10000000;
// Get scaled value of 2nd-last booth addend.
addend = multiplicand * factor1;
// Finalize bits 63-62 of accum magic using its sign.
accum -= addend & 0x40000000;

// Get carry from carry-save add in bit 61 and propagate it to bit 62.
u32 sum = accum + (addend & 0x20000000);
// Subtract out carry magic to get actual accum magic.
accum -= carry;

// Get scaled value of last booth addend.
addend = multiplicand * factor0;
// Add to bit 62 and propagate carry.
sum += addend & 0x40000000;

// Cancel out accum magic bit 63 to get carry bit 63.
return (sum ^ accum) >> 31;
}

u32 ADD(u32 op1, u32 op2, bool set_flags) {
Expand Down
12 changes: 9 additions & 3 deletions src/nba/src/arm/handlers/handler16.inl
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,18 @@ void Thumb_ALU(u16 instruction) {
break;
}
case ThumbDataOp::MUL: {
TickMultiply(state.reg[dst]);
u32 lhs = state.reg[src];
u32 rhs = state.reg[dst];
bool full = TickMultiply(rhs);
pipe.access = Access::Code | Access::Nonsequential;

state.reg[dst] *= state.reg[src];
state.reg[dst] = lhs * rhs;
SetZeroAndSignFlag(state.reg[dst]);
state.cpsr.f.c = 0;
if (full) {
state.cpsr.f.c = MultiplyCarrySimple(rhs);
} else {
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs);
}
break;
}
case ThumbDataOp::BIC: {
Expand Down
33 changes: 24 additions & 9 deletions src/nba/src/arm/handlers/handler32.inl
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,22 @@ void ARM_Multiply(u32 instruction) {
auto rhs = GetReg(op2);
auto result = lhs * rhs;

TickMultiply(rhs);
bool full = TickMultiply(rhs);

u32 accum = 0;
if (accumulate) {
result += GetReg(op3);
accum = GetReg(op3);
result += accum;
bus.Idle();
}

if (set_flags) {
SetZeroAndSignFlag(result);
if (full) {
state.cpsr.f.c = MultiplyCarrySimple(rhs);
} else {
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum);
}
}

SetReg(dst, result);
Expand All @@ -273,7 +280,7 @@ void ARM_MultiplyLong(u32 instruction) {
int dst_lo = (instruction >> 12) & 0xF;
int dst_hi = (instruction >> 16) & 0xF;

s64 result;
u64 result;

pipe.access = Access::Code | Access::Nonsequential;
state.r15 += 4;
Expand All @@ -284,18 +291,21 @@ void ARM_MultiplyLong(u32 instruction) {
if (sign_extend) {
result = s64(s32(lhs)) * s64(s32(rhs));
} else {
result = s64(u64(lhs) * u64(rhs));
result = u64(lhs) * u64(rhs);
}

TickMultiply<sign_extend>(rhs);
bool full = TickMultiply<sign_extend>(rhs);
bus.Idle();

u32 accum_lo = 0;
u32 accum_hi = 0;
if (accumulate) {
s64 value = GetReg(dst_hi);
accum_lo = GetReg(dst_lo);
accum_hi = GetReg(dst_hi);

value <<= 16;
value <<= 16;
value |= GetReg(dst_lo);
u64 value = accum_hi;
value <<= 32;
value |= accum_lo;

result += value;
bus.Idle();
Expand All @@ -306,6 +316,11 @@ void ARM_MultiplyLong(u32 instruction) {
if (set_flags) {
state.cpsr.f.n = result_hi >> 31;
state.cpsr.f.z = result == 0;
if (full) {
state.cpsr.f.c = MultiplyCarryHi<sign_extend>(lhs, rhs, accum_hi);
} else {
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum_lo);
}
}

SetReg(dst_lo, result & 0xFFFFFFFF);
Expand Down

0 comments on commit 61b0c90

Please sign in to comment.