diff --git a/README.md b/README.md index e7f504e..8752593 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,30 @@ echo dOut # @[(4.5, 0.0), (2.081559480312316, -1.651098762732523), (-1.831559480312316, 1.608220406444071), (0.0, 0.0), (0.0, 0.0)] ``` +## Primes + +The `primes` module implements several procedures related to prime numbers. + +Prime numbers are an essential building block of many algorithms in diverse +areas such as cryptography, digital communications and many others. +This module adds a function to generate rank-1 tensors of primes upto a +certain value; as well as a function to calculate the prime factors of a +number. + +### Prime examples + +```nim +# Generate the list of primes smaller or equal to 20 +echo primes(20) +# Tensor[system.int] of shape "[8]" on backend "Cpu" +# 2 3 5 7 11 13 17 19 + +# Get the list of prime factors of the number 60 +echo factor(60) +# Tensor[system.int] of shape "[4]" on backend "Cpu" +# 2 2 3 5 +``` + ## LFSR LFSR module which implements a Linear Feedback Shift Register that can be diff --git a/impulse/primes.nim b/impulse/primes.nim new file mode 100644 index 0000000..ff04979 --- /dev/null +++ b/impulse/primes.nim @@ -0,0 +1,201 @@ +## Module that implements several procedures related to prime numbers +## +## Prime numbers are an essential building block of many algorithms in diverse +## areas such as cryptography, digital communications and many others. +## This module adds a function to generate rank-1 tensors of primes upto a +## certain value; as well as a function to calculate the prime factors of a +## number. + +import arraymancer + +proc primes*[T: SomeInteger | SomeFloat](upto: T): Tensor[T] = + ## Generate a Tensor of prime numbers up to a certain value + ## + ## Return a Tensor of the prime numbers less than or equal to `upto`. + ## A prime number is one that has no factors other than 1 and itself. + ## + ## Input: + ## - upto: Integer up to which primes will be generated + ## + ## Result: + ## - Integer Tensor of prime values less than or equal to `upto` + ## + ## Note: + ## - This function implements a "half" Sieve of Erathostenes algorithm + ## which is a classical Sieve of Erathostenes in which only odd numbers + ## are checked. Many examples of this algorithm can be found online. + ## It also stops checking after sqrt(upto) + ## - The memory required by this procedure is proportional to the input + ## number. + when T is SomeFloat: + if upto != round(upto): + raise newException(ValueError, + "`upto` value (" & $upto & ") must be a whole number") + + if upto < 11: + # Handle the primes below 11 to simplify the general code below + # (by removing the need to handle the few cases in which the index to + # `isprime`, calculated based on `factor` is negative) + # This is the minimum set of primes that we must handle, but we could + # extend this list to make the calculation faster for more of the + # smallest primes + let prime_candidates = [2.T, 3, 5, 7].toTensor() + return prime_candidates[prime_candidates <=. upto] + + # General algorithm (valid for numbers higher than 10) + let prime_candidates = arange(3.T, T(upto + 1), 2.T) + var isprime = ones[bool]((upto.int - 1) div 2) + let max_possible_factor_idx = int(sqrt(upto.float)) div 2 + for factor in prime_candidates[_ ..< max_possible_factor_idx]: + if isprime[(factor.int - 2) div 2]: + isprime[(factor.int * 3 - 2) div 2 .. _ | factor.int] = false + + # Note that 2 is missing from the result, so it must be manually added to + # the front of the result tensor + return [2.T].toTensor().append(prime_candidates[isprime]) + +# The maximum float64 that can be represented as an integer that is followed by a +# another integer that is representable as a float64 as well +const maximumConsecutiveFloat64Int = pow(2.0, 53) - 1.0 + +proc factor*[T: SomeInteger | SomeFloat](n: T): Tensor[T] = + ## Return a Tensor containing the prime factors of the input + ## + ## Input: + ## - n: A value that will be factorized. + ## If its type is floating-point it must be a whole number. Otherwise + ## a ValueError will be raised. + ## Result: + ## - A sorted Tensor containing the prime factors of the input. + ## + ## Example: + ## ```nim + ## echo factor(60) + ## # Tensor[system.int] of shape "[4]" on backend "Cpu" + ## # 2 2 3 5 + ## ``` + if n < 0: + raise newException(ValueError, + "Negative values (" & $n & ") cannot be factorized") + when T is int64: + if n > T(maximumConsecutiveFloat64Int): + raise newException(ValueError, + "Value (" & $n & ") is too large to be factorized") + elif T is SomeFloat: + if floor(n) != n: + raise newException(ValueError, + "Non whole numbers (" & $n & ") cannot be factorized") + + if n < 4: + return [n].toTensor + + # The algorithm works by keeping track of the list of unique potential, + # candidate prime factors of the input, and iteratively adding those + # that are confirmed to be factors into a list of confirmed factors + # (which is stored in the `result` tensor variable). + + # First we must initialize the `candidate_factor` Tensor + # The factors of the input can be found among the list of primes + # that are smaller than or equal to input. However we can significantly + # reduce the candidate list by taking into account the fact that only a + # single factor can be greater than the square root of the input. + # The algorithm is such that if that is the case we will add the input number + # at the very end of the loop below + var candidate_factors = primes(T(ceil(sqrt(float(n))))) + + # This list of prime candidate_factors is refined by finding those of them + # that divide the input value (i.e. those whose `input mod prime` == 0). + # Those candiates that don't divide the input are known to not be valid + # factors and can be removed from the candidate_factors list. Those that do + # divide the input are confirmed as valid factors and as such are added to + # the result list. Then the input is divided by all of the remaining + # candidates (by dividing the input by the product of all the remaining + # candidates). The result is a number that is the product of all the factors + # that are still unknown (which must be among the remaining candidates!) and + # which we can call `unknown_factor_product`. + # Then we can simply repeat the same process over and over, replacing the + # original input with the remaining `unknown_factor_product` after each + # iteration, until the `unknown_factors_product` (which is reduced by each + # division at the end of each iteration) reaches 1. Alternatively, we might + # run out of candidates, which will only happen when there is only one factor + # left (which must be greater than the square root of the input) and is stored + # in the `unknown_factors_product`. In that case we add it to the confirmed + # factors (result) list and the process can stop. + var unknown_factor_product = n + while unknown_factor_product > 1: + # Find the primes that are divisors of the remaining unknown_factors_product + # Note that this tells us which of the remaining candidate_factors are + # factors of the input _at least once_ (but they could divide it more + # than once) + let is_factor = (unknown_factor_product mod candidate_factors) ==. 0 + # Keep only the factors that divide the remainder and remove the rest + # from the list of candidates + candidate_factors = candidate_factors[is_factor] + # after this step, all the items incandidate_factors are _known_ to be + # factors of `unknown_factor_product` _at least once_! + if candidate_factors.len == 0: + # If there are no more prime candidates left, it means that the remainder + # is a prime (and that it must be greater than the sqrt of the input), + # and that we are done (after adding it to the result list) + result = result.append([unknown_factor_product].toTensor) + break + # If we didn't stop it means that there are still candidates which we + # _know_ are factors of the remainder, so we must add them to the result + result = result.append(candidate_factors) + # Now we can prepare for the next iteration by dividing the remainder, + # by the factors we just added to the result. This reminder is the product + # of the factors we don't know yet + unknown_factor_product = T(unknown_factor_product / product(candidate_factors)) + # After this division the items in `candidate_factors` become candidates again + # and we can start a new iteration + result = sorted(result) + +proc isprimeImpl[T: SomeInteger | SomeFloat](n: T, candidate_factors: Tensor[int]): bool {.inline.} = + ## Actual implementation of the isprime check + # This function is optimized for speed in 2 ways: + # 1. By first rejecting all non-whole float numbers and then performing the + # actual isprime check using integers (which is faster than using floats) + # 2. By receving a pre-calculated tensor of candidate_factors. This does not + # speed up the check of a single value, but it does speed up the check of + # a tensor of values. Note that because of #1 the candidate_factors must + # be a tensor of ints (even if the number being checked is a float) + when T is SomeFloat: + if floor(n) != n: + return false + let n = int(n) + result = (n > 1) and all(n mod candidate_factors[candidate_factors <. n]) + +proc isprime*[T: SomeInteger | SomeFloat](n: T): bool = + ## Check whether the input is a prime number + ## + ## Only positive values higher than 1 can be primes (i.e. we exclude 1 and -1 + ## which are sometimes considered primes). + ## + ## Note that this function also works with floats, which are considered + ## non-prime when they are not whole numbers. + # Note that we do here some checks that are repeated later inside of + # `isprimeImpl`. This is done to avoid the unnecessary calculation of + # the `candidate_factors` tensor in those cases + if n <= 1: + return false + when T is int64: + if n > T(maximumConsecutiveFloat64Int): + raise newException(ValueError, + "Value (" & $n & ") is too large to be factorized") + elif T is SomeFloat: + if floor(n) != n: + return false + var candidate_factors = primes(int(ceil(sqrt(float(n))))) + isprimeImpl(n, candidate_factors) + +proc isprime*[T: SomeInteger | SomeFloat](t: Tensor[T]): Tensor[bool] = + ## Element-wise check if the input values are prime numbers + result = zeros[bool](t.len) + # Pre-calculate the list of primes that will be used for the element-wise + # isprime check and then call isprimeImpl on each element + # Note that the candidate_factors must be a tensor of ints (for performance + # reasons) + var candidate_factors = primes(int(ceil(sqrt(float(max(t.flatten())))))) + for idx, val in t.enumerate(): + result[idx] = isprimeImpl(val, candidate_factors) + return result.reshape(t.shape) diff --git a/tests/test_primes.nim b/tests/test_primes.nim new file mode 100644 index 0000000..6daa5f2 --- /dev/null +++ b/tests/test_primes.nim @@ -0,0 +1,134 @@ +import ../impulse/primes +import arraymancer +import std / unittest +import std / random + +proc test_primes() = + ## Test the `primes` function + test "Prime number generation (integer values)": + check: primes(0).len == 0 + check: primes(1).len == 0 + check: primes(2) == [2].toTensor + check: primes(3) == [2, 3].toTensor + check: primes(4) == [2, 3].toTensor + check: primes(11) == [2, 3, 5, 7, 11].toTensor + check: primes(12) == [2, 3, 5, 7, 11].toTensor + check: primes(19) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor + check: primes(20) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor + check: primes(22) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor + check: primes(100000).len == 9592 + check: primes(100003).len == 9593 + check: primes(100000)[^1].item == 99991 + + test "Prime number generation (floating-point values)": + check: primes(100000.0).len == 9592 + check: primes(100000.0)[^1].item == 99991.0 + + # An exception must be raised if the `upto` value is not a whole number + try: + discard primes(100.5) + check: false + except ValueError: + # This is what should happen! + discard + +proc generate_random_factor_tensor[T]( + max_value: T, max_factors: int, prime_list: Tensor[T]): Tensor[T] = + ## Generate a tensor of random prime factors taken from a tensor of primes + ## The tensor length will not exceed the `max_factors` and the product of + ## the factors will not exceed `max_value` either. + ## This is not just a random list of values taken from the `prime_list` + ## Instead we artificially introduce a random level of repetition of the + ## chosen factors to emulate the fact that many numbers have repeated + ## prime factors + let max_value = rand(4 .. 2 ^ 53) + let max_factors = rand(1 .. 20) + result = newTensor[int](max_factors) + var value = 1 + var factor = prime_list[rand(prime_list.len - 1)] + for idx in 0 ..< max_factors: + # Randomly repeat the previous factor + # Higher number of repetitions are less likely + let repeat_factor = rand(5) < 1 + if not repeat_factor: + factor = prime_list[rand(prime_list.len - 1)] + let new_value = factor * value + if new_value >= max_value: + break + result[idx] = factor + value = new_value + result = sorted(result) + result = result[result >. 0] + +proc test_factor() = + test "Prime factorization of integer values (factor)": + check: factor(60) == [2, 2, 3, 5].toTensor + check: factor(100001) == [11, 9091].toTensor + + # Check that the product of the factorization of a few random values + # equals the original numbers + for n in 0 ..< 10: + let value = rand(10000) + check: value == product(factor(value)) + + # Repeat the previous test in a more sophisticated manner + # Instead of generating random values and checking that the + # product of their factorization is the same as the original values + # (which could work for many incorrect implementations of factor), + # generate a few random factor tensors, multiply them to get + # the number that has them as prime factors, factorize those numbers + # and check that their factorizations matches the original tensors + let prime_list = primes(100) + for n in 0 ..< 10: + let max_value = rand(4 .. 2 ^ 53) + let max_factors = rand(1 .. 20) + var factors = generate_random_factor_tensor( + max_value, max_factors, prime_list) + let value = product(factors) + check: factor(value) == factors + + test "Prime factorization of floating-point values (factor)": + # Floating-point + check: factor(60.0) == [2.0, 2, 3, 5].toTensor + check: factor(100001.0) == [11.0, 9091].toTensor + + # Check that the product of the factorization of a few random values + # equals the original numbers + # Note that here we do not also do the reverse check (as we do for ints) + # in order to make the test faster + for n in 0 ..< 10: + let value = floor(rand(10000.0)) + check: value == product(factor(value)) + + # An exception must be raised if we try to factorize a non-whole number + try: + discard factor(100.5) + check: false + except ValueError: + # This is what should happen! + discard + +proc test_isprime() = + test "isprime": + check: isprime(7) == true + check: isprime(7.0) == true + check: isprime(7.5) == false + check: isprime(1) == false + check: isprime(0) == false + check: isprime(-1) == false + let t = [ + [-1, 0, 2, 4], + [ 5, 6, 7, 11] + ].toTensor + let expected = [ + [false, false, true, false], + [ true, false, true, true] + ].toTensor + check: isprime(t) == expected + check: isprime(t.asType(float)) == expected + +# Run the tests +suite "Primes": + test_primes() + test_factor() + test_isprime()