From 8ef097705d88810654ba8c3639d1c257b54b863f Mon Sep 17 00:00:00 2001
From: Ted Conbeer <tconbeer@users.noreply.github.com>
Date: Fri, 24 Jan 2025 16:26:06 -0700
Subject: [PATCH] fix: lowercase numbers

---
 CHANGELOG.md                                     |  1 +
 src/sqlfmt/tokens.py                             |  1 +
 .../unformatted/132_spark_number_literals.sql    | 16 ++++++++--------
 tests/unit_tests/test_node_manager.py            | 16 ++++++++++++++++
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70433551..22e5b4fd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
 - sqlfmt no longer adds a space between the `*` and `columns` in DuckDB `*columns` expressions ([#657](https://github.com/tconbeer/sqlfmt/issues/657) - thank you [@aersam](https://github.com/aersam)!)
 - sqlfmt no longer adds a space between the `:` and `['name']` in a DataBricks escaped variant expression like `foo:['bar.baz']` ([#637](https://github.com/tconbeer/sqlfmt/issues/637) - thank you [@aersam](https://github.com/aersam)!)
 - sqlfmt no longer lexes the Postgres operators `#>`, `#>>`, `#-`, and `##` as comments ([#461](https://github.com/tconbeer/sqlfmt/issues/461) - thank you [@pauljz](https://github.com/pauljz) and many others!)
+- sqlfmt will now lowercase the letters in a number literal like `".1234567E+2BD` ([#645](https://github.com/tconbeer/sqlfmt/issues/645)).
 
 ### Testing
 - sqlfmt is now tested against and fully supports Python 3.13
diff --git a/src/sqlfmt/tokens.py b/src/sqlfmt/tokens.py
index 9ab621aa..c5ab8b89 100644
--- a/src/sqlfmt/tokens.py
+++ b/src/sqlfmt/tokens.py
@@ -131,6 +131,7 @@ def is_always_lowercased(self) -> bool:
             TokenType.ON,
             TokenType.BOOLEAN_OPERATOR,
             TokenType.SET_OPERATOR,
+            TokenType.NUMBER,
         ]
 
     @cached_property
diff --git a/tests/data/unformatted/132_spark_number_literals.sql b/tests/data/unformatted/132_spark_number_literals.sql
index cf1e4576..05dfc923 100644
--- a/tests/data/unformatted/132_spark_number_literals.sql
+++ b/tests/data/unformatted/132_spark_number_literals.sql
@@ -24,9 +24,9 @@ select -2147483648 as col
 ;
 select 9223372036854775807l as col
 ;
-select -32Y as col
+select -32y as col
 ;
-select 482S as col
+select 482s as col
 ;
 select 12.578 as col
 ;
@@ -36,19 +36,19 @@ select -.1234567 as col
 ;
 select 123. as col
 ;
-select 123.BD as col
+select 123.bd as col
 ;
-select 5E2 as col
+select 5e2 as col
 ;
-select 5D as col
+select 5d as col
 ;
-select -5BD as col
+select -5bd as col
 ;
 select 12.578e-2d as col
 ;
-select -.1234567E+2BD as col
+select -.1234567e+2bd as col
 ;
 select +3.e+3 as col
 ;
-select -3.E-3D as col
+select -3.e-3d as col
 ;
diff --git a/tests/unit_tests/test_node_manager.py b/tests/unit_tests/test_node_manager.py
index b7f8a7f0..875720fa 100644
--- a/tests/unit_tests/test_node_manager.py
+++ b/tests/unit_tests/test_node_manager.py
@@ -264,6 +264,22 @@ def test_capitalization_operators(default_mode: Mode, source_string: str) -> Non
     assert parsed_string.rstrip("\n") == source_string.lower()
 
 
+@pytest.mark.parametrize(
+    "source_string",
+    [
+        "1e4",
+        "1E4",
+        "-.1234567E+2BD",
+    ],
+)
+def test_capitalization_numbers(default_mode: Mode, source_string: str) -> None:
+    q = default_mode.dialect.initialize_analyzer(
+        line_length=default_mode.line_length
+    ).parse_query(source_string=source_string)
+    parsed_string = "".join(str(line) for line in q.lines)
+    assert parsed_string.rstrip("\n") == source_string.lower()
+
+
 @pytest.mark.parametrize(
     "source_string",
     [