Skip to content

Commit

Permalink
parse_e_notation function minor rework
Browse files Browse the repository at this point in the history
added additional tests for e/scientific notation
  • Loading branch information
Nekit2217 committed Apr 10, 2024
1 parent 2eb2706 commit 5e07973
Showing 1 changed file with 24 additions and 15 deletions.
39 changes: 24 additions & 15 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@ fn parse_e_notation<T: DecimalType>(
mut digits: u16,
mut fractionals: i16,
mut result: T::Native,
index: usize,
precision: u16,
scale: i16,
) -> Result<T::Native, ArrowError> {
Expand All @@ -685,18 +686,15 @@ fn parse_e_notation<T: DecimalType>(
let mut exp_start: bool = false;
// e has a plus sign
let mut pos_shift_direction: bool = true;
let mut bs;

// no fractional part
if fractionals == 0 {
bs = s.as_bytes().iter().skip(digits as usize)
// skip to point or exponent index
let mut bs;
if fractionals > 0 {
// it's a fraction, so the point index needs to be skipped, so +1
bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
} else {
// digits + 1 because of the period in the string
bs = s.as_bytes().iter().skip((digits + 1) as usize);
// check for leading 0, because in parse_decimal function zeros before the period are not counted
if digits == fractionals as u16 {
bs.next();
};
// it's actually an integer that is already written into the result, so let's skip on to e
bs = s.as_bytes().iter().skip(index);
}

while let Some(b) = bs.next() {
Expand Down Expand Up @@ -769,7 +767,7 @@ fn parse_e_notation<T: DecimalType>(
exp *= -1;
}

// period offset
// point offset
exp = fractionals - exp;
// We have zeros on the left, we need to count them
if !pos_shift_direction && exp > digits as i16 {
Expand Down Expand Up @@ -818,13 +816,13 @@ pub fn parse_decimal<T: DecimalType>(
)));
}

let mut is_e_notation = false;
let mut bs = bs.iter().enumerate();

let mut bs = bs.iter();
let mut is_e_notation = false;

// Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
// Thus, if we validate the precision correctly, we can skip overflow checks.
while let Some(b) = bs.next() {
while let Some((index, b)) = bs.next() {
match b {
b'0'..=b'9' => {
if digits == 0 && *b == b'0' {
Expand All @@ -836,14 +834,17 @@ pub fn parse_decimal<T: DecimalType>(
result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
}
b'.' => {
for b in bs.by_ref() {
let point_index = index;

for (_, b) in bs.by_ref() {
if !b.is_ascii_digit() {
if *b == b'e' || *b == b'E' {
result = match parse_e_notation::<T>(
s,
digits as u16,
fractionals as i16,
result,
point_index,
precision as u16,
scale as i16,
) {
Expand Down Expand Up @@ -888,6 +889,7 @@ pub fn parse_decimal<T: DecimalType>(
digits as u16,
fractionals as i16,
result,
index,
precision as u16,
scale as i16,
) {
Expand Down Expand Up @@ -2371,6 +2373,7 @@ mod tests {
let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
assert_eq!(i256::from_i128(i), result_256.unwrap());
}

let e_notation_tests = [
("1.23e3", "1230.0", 2),
("5.6714e+2", "567.14", 4),
Expand All @@ -2397,7 +2400,13 @@ mod tests {
("0E+6", "0", 10),
("1E-8", "0.00000001", 10),
("12E+6", "12000000", 10),
("12E-6", "0.000012", 10),
("0.1e-6", "0.0000001", 10),
("0.1e+6", "100000", 10),
("0.12e-6", "0.00000012", 10),
("0.12e+6", "120000", 10),
("000000000001e0", "000000000001", 3),
("000001.1034567002e0", "000001.1034567002", 3),
];
for (e, d, scale) in e_notation_tests {
let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
Expand Down

0 comments on commit 5e07973

Please sign in to comment.