Skip to content

Commit

Permalink
Merge pull request #949 from Mathics3/int-str-conversion-limit
Browse files Browse the repository at this point in the history
fix int-str conversion in Python 3.11
  • Loading branch information
rocky authored Dec 18, 2023
2 parents 4df1273 + f58e170 commit 06a2f26
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 20 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CHANGES
New Builtins
++++++++++++


* ``$MaxLengthIntStringConversion``
* ``Elements``
* ``ConjugateTranspose``
* ``LeviCivitaTensor``
Expand All @@ -29,6 +29,8 @@ Internals
* Maximum number of digits allowed in a string set to 7000 and can be adjusted using environment variable
``MATHICS_MAX_STR_DIGITS`` on Python versions that don't adjust automatically (like pyston).
* Real number comparisons implemented is based now in the internal implementation of `RealSign`.
* For Python 3.11, the variable ``$MaxLengthIntStringConversion`` controls the maximum size of
the literal conversion between large integers and Strings.

Bugs
----
Expand Down
129 changes: 113 additions & 16 deletions mathics/builtin/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from mathics import version_string
from mathics.core.atoms import Integer, Integer0, IntegerM1, Real, String
from mathics.core.attributes import A_CONSTANT
from mathics.core.builtin import Builtin, Predefined
from mathics.core.convert.expression import to_mathics_list
from mathics.core.expression import Expression
Expand All @@ -29,18 +30,100 @@
have_psutil = True


class MaxLengthIntStringConversion(Predefined):
"""
<url>:Python 3.11 Integer string conversion length limitation:
https://docs.python.org/3.11/library/stdtypes.html#int-max-str-digits</url>
<dl>
<dt>'$MaxLengthIntStringConversion'
<dd>A system constant that fixes the largest size of the string that can \
result when converting an 'Integer' value into a 'String'. When the \
'String' is too large, then the middle of the integer contains \
an indication of the number of digits elided.
If $MaxLengthIntStringConversion' is set to 0, there is no \
bound. Aside from 0, 640 is the smallest value allowed.
</dl>
Although Mathics3 can represent integers of arbitrary size, when it formats \
the value for display, there can be nonlinear behavior in converting the number to \
decimal.
Python, in version 3.11 and up, puts a default limit on the size of \
the number of digits it will allow when conversting a big-num integer into \
a string.
Show the default value of '$MaxLengthIntStringConversion':
>> $MaxLengthIntStringConversion
= 7000
Set '$MaxLenghtIntStringConversion' to the smallest value allowed:
>> $MaxLengthIntStringConversion = 640
= 640
>> 500! //ToString//StringLength
= ...
>> $MaxLengthIntStringConversion = 0; 500! //ToString//StringLength
= 1135
The below has an effect only on Python 3.11 and later:
>> $MaxLengthIntStringConversion = 650; 500! //ToString
= ...
Other than 0, Python 3.11 does not accept a value less than 640:
>> $MaxLengthIntStringConversion = 10
: 10 is not 0 or an Integer value greater than 640.
= ...
"""

attributes = A_CONSTANT
messages = {"inv": "`1` is not 0 or an Integer value greater than 640."}
name = "$MaxLengthIntStringConversion"
summary_text = "the maximum length for which an integer is converted to a String"

def evaluate(self, evaluation) -> Integer:
try:
return Integer(sys.get_int_max_str_digits())
except AttributeError:
return Integer0

def eval_set(self, expr, evaluation):
"""Set[$MaxLengthIntStringConversion, expr_]"""
if isinstance(expr, Integer):
try:
sys.set_int_max_str_digits(expr.value)
return self.evaluate(evaluation)
except AttributeError:
if expr.value != 0 and expr.value < 640:
evaluation.message("$MaxLengthIntStringConversion", "inv", expr)
return Integer0
except ValueError:
pass

evaluation.message("$MaxLengthIntStringConversion", "inv", expr)
return self.evaluate(evaluation)

def eval_setdelayed(self, expr, evaluation):
"""SetDelayed[$MaxLengthIntStringConversion, expr_]"""
return self.eval_set(expr)


class CommandLine(Predefined):
"""
<url>:WMA link:https://reference.wolfram.com/language/ref/$CommandLine.html</url>
<dl>
<dt>'$CommandLine'
<dd>is a list of strings passed on the command line to launch the Mathics session.
<dd>is a list of strings passed on the command line to launch the Mathics3 session.
</dl>
>> $CommandLine
= {...}
"""

summary_text = "the command line arguments passed when the current Mathics session was launched"
summary_text = (
"the command line arguments passed when the current Mathics3 "
"session was launched"
)
name = "$CommandLine"

def evaluate(self, evaluation) -> Expression:
Expand Down Expand Up @@ -113,7 +196,8 @@ class Machine(Predefined):
<dl>
<dt>'$Machine'
<dd>returns a string describing the type of computer system on which the Mathics is being run.
<dd>returns a string describing the type of computer system on which the \
Mathics3 is being run.
</dl>
X> $Machine
= linux
Expand All @@ -132,7 +216,8 @@ class MachineName(Predefined):
<dl>
<dt>'$MachineName'
<dd>is a string that gives the assigned name of the computer on which Mathics is being run, if such a name is defined.
<dd>is a string that gives the assigned name of the computer on which Mathics3 \
is being run, if such a name is defined.
</dl>
X> $MachineName
= buster
Expand Down Expand Up @@ -169,7 +254,8 @@ class Packages(Predefined):
<dl>
<dt>'$Packages'
<dd>returns a list of the contexts corresponding to all packages which have been loaded into Mathics.
<dd>returns a list of the contexts corresponding to all packages which have \
been loaded into Mathics.
</dl>
X> $Packages
Expand All @@ -189,7 +275,8 @@ class ParentProcessID(Predefined):
<dl>
<dt>'$ParentProcesID'
<dd>gives the ID assigned to the process which invokes the \Mathics by the operating system under which it is run.
<dd>gives the ID assigned to the process which invokes Mathics3 by the operating \
system under which it is run.
</dl>
>> $ParentProcessID
Expand All @@ -209,7 +296,8 @@ class ProcessID(Predefined):
<dl>
<dt>'$ProcessID'
<dd>gives the ID assigned to the \Mathics process by the operating system under which it is run.
<dd>gives the ID assigned to the Mathics3 process by the operating system under \
which it is run.
</dl>
>> $ProcessID
Expand All @@ -223,23 +311,25 @@ def evaluate(self, evaluation) -> Integer:


class ProcessorType(Predefined):
r"""
"""
<url>
:WMA link:
https://reference.wolfram.com/language/ref/ProcessorType.html</url>
<dl>
<dt>'$ProcessorType'
<dd>gives a string giving the architecture of the processor on which the \Mathics is being run.
<dd>gives a string giving the architecture of the processor on which \
Mathics3 is being run.
</dl>
>> $ProcessorType
= ...
"""

name = "$ProcessorType"

summary_text = (
"name of the architecture of the processor over which Mathics is running"
"name of the architecture of the processor over which Mathics3 is running"
)

def evaluate(self, evaluation):
Expand All @@ -252,14 +342,14 @@ class PythonImplementation(Predefined):
<dl>
<dt>'$PythonImplementation'
<dd>gives a string indication the Python implementation used to run \Mathics.
<dd>gives a string indication the Python implementation used to run Mathics3.
</dl>
>> $PythonImplementation
= ...
"""
name = "$PythonImplementation"

summary_text = "name of the Python implementation running Mathics"
summary_text = "name of the Python implementation running Mathics3"

def evaluate(self, evaluation):
from mathics.system_info import python_implementation
Expand Down Expand Up @@ -299,7 +389,8 @@ class Run(Builtin):
<dl>
<dt>'Run[$command$]'
<dd>runs command as an external operating system command, returning the exit code obtained.
<dd>runs command as an external operating system command, returning the exit \
code returned from running the system command.
</dl>
X> Run["date"]
= ...
Expand Down Expand Up @@ -337,7 +428,8 @@ class SystemWordLength(Predefined):
<dl>
<dt>'$SystemWordLength'
<dd>gives the effective number of bits in raw machine words on the computer system where \Mathics is running.
<dd>gives the effective number of bits in raw machine words on the computer \
system where Mathics3 is running.
</dl>
X> $SystemWordLength
= 64
Expand Down Expand Up @@ -568,9 +660,14 @@ class Share(Builtin):
<dl>
<dt>'Share[]'
<dd>release memory forcing Python to do garbage collection. If Python package is 'psutil' installed is the amount of released memoryis returned. Otherwise returns $0$. This function differs from WMA which tries to reduce the amount of memory required to store definitions, by reducing duplicated definitions.
<dd>release memory forcing Python to do garbage collection. If Python package \
'psutil' installed is the amount of released memoryis returned. Otherwise \
returns $0$. This function differs from WMA which tries to reduce the amount \
of memory required to store definitions, by reducing duplicated definitions.
<dt>'Share[Symbol]'
<dd>Does the same thing as 'Share[]'; Note: this function differs from WMA which tries to reduce the amount of memory required to store definitions associated to $Symbol$.
<dd>Does the same thing as 'Share[]'; Note: this function differs from WMA which \
tries to reduce the amount of memory required to store definitions associated \
to $Symbol$.
</dl>
Expand Down
18 changes: 15 additions & 3 deletions mathics/core/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,21 @@ def default_format(self, evaluation, form) -> str:
def make_boxes(self, form) -> "String":
from mathics.eval.makeboxes import _boxed_string

if form in ("System`InputForm", "System`FullForm"):
return _boxed_string(str(self.value), number_as_text=True)
return String(str(self._value))
try:
if form in ("System`InputForm", "System`FullForm"):
return _boxed_string(str(self.value), number_as_text=True)

return String(str(self._value))
except ValueError:
# In Python 3.11, the size of the string
# obtained from an integer is limited, and for longer
# numbers, this exception is raised.
# The idea is to represent the number by its
# more significative digits, the lowest significative digits,
# and a placeholder saying the number of ommited digits.
from mathics.eval.makeboxes import int_to_string_shorter_repr

return int_to_string_shorter_repr(self._value, form)

def to_sympy(self, **kwargs):
return sympy.Integer(self._value)
Expand Down
78 changes: 78 additions & 0 deletions mathics/eval/makeboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,84 @@ def _boxed_string(string: str, **options):
return StyleBox(String(string), **options)


# 640 = sys.int_info.str_digits_check_threshold.
# Someday when 3.11 is the minumum version of Python supported,
# we can replace the magic value 640 below with sys.int.str_digits_check_threshold.
def int_to_string_shorter_repr(value: Integer, form: Symbol, max_digits=640):
"""Convert value to a String, restricted to max_digits characters.
if value has an n-digit decimal representation,
value = d_1 *10^{n-1} d_2 * 10^{n-2} + d_3 10^{n-3} + ..... +
d_{n-2}*100 +d_{n-1}*10 + d_{n}
is represented as the string
"d_1d_2d_3...d_{k}<<n-2k>>d_{n-k-1}...d_{n-2}d_{n-1}d_{n}"
where n-2k digits are replaced by a placeholder.
"""
if max_digits == 0:
return String(str(value))

# Normalize to positive quantities
is_negative = value < 0
if is_negative:
value = -value
max_digits = max_digits - 1

# Estimate the number of decimal digits
num_digits = int(value.bit_length() * 0.3)

# If the estimated number is bellow the threshold,
# return it as it is.
if num_digits <= max_digits:
if is_negative:
return String("-" + str(value))
return String(str(value))

# estimate the size of the placeholder
size_placeholder = len(str(num_digits)) + 6
# Estimate the number of avaliable decimal places
avaliable_digits = max(max_digits - size_placeholder, 0)
# how many most significative digits include
len_msd = (avaliable_digits + 1) // 2
# how many least significative digits to include:
len_lsd = avaliable_digits - len_msd
# Compute the msd.
msd = str(value // 10 ** (num_digits - len_msd))
if msd == "0":
msd = ""

# If msd has more digits than the expected, it means that
# num_digits was wrong.
extra_msd_digits = len(msd) - len_msd
if extra_msd_digits > 0:
# Remove the extra digit and fix the real
# number of digits.
msd = msd[:len_msd]
num_digits = num_digits + 1

lsd = ""
if len_lsd > 0:
lsd = str(value % 10 ** (len_lsd))
# complete decimal positions in the lsd:
lsd = (len_lsd - len(lsd)) * "0" + lsd

# Now, compute the true number of hiding
# decimal places, and built the placeholder
remaining = num_digits - len_lsd - len_msd
placeholder = f" <<{remaining}>> "
# Check if the shorten string is actually
# shorter than the full string representation:
if len(placeholder) < remaining:
value_str = f"{msd}{placeholder}{lsd}"
else:
value_str = str(value)

if is_negative:
value_str = "-" + value_str
return String(value_str)


def eval_fullform_makeboxes(
self, expr, evaluation: Evaluation, form=SymbolStandardForm
) -> Expression:
Expand Down
Loading

0 comments on commit 06a2f26

Please sign in to comment.