apache · chengw-netflix · Apr 26, 2024 · May 1, 2024 · Apr 29, 2024 · Dec 12, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/doc/modules/cassandra/pages/developing/cql/functions.adoc b/doc/modules/cassandra/pages/developing/cql/functions.adoc
@@ -288,6 +288,189 @@ A number of functions allow to obtain the similarity score between vectors of fl
 
 include::cassandra:partial$vector-search/vector_functions.adoc[]
 
+[[human-helper-functions]]
+==== Human helper functions
+
+For user's convenience, there are currently two functions which are converting values to more human-friendly represetations.
+
+[cols=",,",options="header",]
+|===
+| Function name | Input type | Description
+| `format_bytes` |`int`, `tinyint`, `smallint`, `bigint`, `varint`, `ascii`, `text` | Converts values in bytes to a more human-friendly representation.
+
+| `format_time` |`int`, `tinyint`, `smallint`, `bigint`, `varint`, `ascii`, `text` | Converts values in milliseconds to a more human-friendly representation.
+
+|===
+
+
+===== format_bytes
+
+This function looks at values in a column as if it was in bytes, and it will convert it to whatever a user pleases. Supported units are: `B`, `KiB`, `MiB` and `GiB`. The result will be rounded to two decimal places.
+
+Supported column types on which this function is possible to be applied:
+`INT`, `TINYINT`, `SMALLINT`, `BIGINT`, `VARINT`, `ASCII`, `TEXT`.
+For `ASCII` and `TEXT` types, text of such column has to be a non-negative number.
+
+Return values can be max of `Long.MAX_VALUE`, If the conversion produces overflown value, `Long.MAX_VALUE` will be returned.
+
+[NOTE]
+====
+The actual return value of the `Long.MAX_VALUE` will be 9223372036854776000 due to the limitations of floating-point precision.
+====
+
+There are three ways how to call this function.
+Let's have this table:
+
+[source,cql]
+----
+cqlsh> select * from ks.tb;
+
+ id | val
+----+----------------
+  5 |          60000
+  1 |        1234234
+  2 | 12342341234234
+  4 |          60001
+  7 |           null
+  6 |             43
+  3 |         123423
+
+----
+
+with schema
+
+[source,cql]
+----
+CREATE TABLE ks.tb (
+    id int PRIMARY KEY,
+    val bigint
+)
+----
+
+Imagine that we wanted to look at `val` values as if they were in mebibytes. We would like to have more human-friendly output in order to not visually divide the values by 1024 in order to get them in respective bigger units. The following function call may take just a column itself as an argument, and it will
+automatically convert it.
+
+[NOTE]
+====
+The default source unit for `format_bytes` function is _bytes_, (`B`).
+====
+
+[source,cql]
+----
+cqlsh> select format_bytes(val) from ks.tb;
+
+ system.format_bytes(val)
+--------------------------
+                58.59 KiB
+                 1.18 MiB
+              11494.7 GiB
+                58.59 KiB
+                     null
+                     43 B
+               120.53 KiB
+----
+
+The second way to call `format_bytes` functions is to specify into what size unit we would like to see all
+values to be converted to. For example, we want all size to be represented in mebibytes, hence we do:
+
+[source,cql]
+----
+cqlsh> select format_bytes(val, 'MiB') from ks.tb;
+
+ system.format_bytes(val, 'MiB')
+----------------------------------
+                        0.06 MiB
+                        1.18 MiB
+                 11770573.84 MiB
+                        0.06 MiB
+                            null
+                           0 MiB
+                        0.12 MiB
+----
+
+Lastly, we can specify a source unit and a target unit. A source unit tells what unit that column is logically of, the target unit tells what unit we want these values to be converted to. For example,
+if we know that our column is logically in kibibytes and we want them to be converted into mebibytes, we would do:
+
+[source,cql]
+----
+cqlsh> select format_bytes(val, 'Kib', 'MiB') from ks.tb;
+
+ system.format_bytes(val, 'KiB', 'MiB')
+----------------------------------------
+                              58.59 MiB
+                            1205.31 MiB
+                     12053067611.56 MiB
+                              58.59 MiB
+                                   null
+                               0.04 MiB
+                             120.53 MiB
+----
+
+===== format_time
+
+Similarly to `format_bytes`, we can do transformations on duration-like columns.
+
+Supported units are: `d`, `h`, `m`, `s`, `ms`, `us`, `µs`, `ns`.
+
+Supported column types on which this function is possible to be applied:
+`INT`, `TINYINT`, `SMALLINT`, `BIGINT`, `VARINT`, `ASCII`, `TEXT`. For `ASCII` and `TEXT` types, text of such column has to be a non-negative number.
+
+Return values can be max of `Double.MAX_VALUE`, If the conversion produces overflown value, `Double.MAX_VALUE` will be returned.
+
+[NOTE]
+====
+The default source unit for `format_time` function is _milliseconds_, (`ms`).
+====
+
+[source,cql]
+----
+cqlsh> select format_time(val) from ks.tb;
+
+ system.format_time(val)
+-------------------------
+                     1 m
+                 20.57 m
+             142851.17 d
+                     1 m
+                    null
+                   43 ms
+                  2.06 m
+----
+
+We may specify what unit we want that value to be converted to, give the column's values are in millisecods:
+
+[source,cql]
+----
+cqlsh> select format_time(val, 'm') from ks.tb;
+
+ system.format_time(val, 'm')
+------------------------------
+                          1 m
+                      20.57 m
+               205705687.24 m
+                          1 m
+                         null
+                          0 m
+                       2.06 m
+----
+
+Lastly, we can specify both source and target values:
+
+[source,cql]
+----
+cqlsh> select format_time(val, 's', 'h') from ks.tb;
+
+ system.format_time(val, 's', 'h')
+-----------------------------------
+                           16.67 h
+                          342.84 h
+                   3428428120.62 h
+                           16.67 h
+                              null
+                            0.01 h
+                           34.28 h
+----
+
 [[user-defined-scalar-functions]]
 === User-defined functions
 

diff --git a/src/java/org/apache/cassandra/config/DataStorageSpec.java b/src/java/org/apache/cassandra/config/DataStorageSpec.java
@@ -479,25 +479,21 @@ public enum DataStorageUnit
     {
         BYTES("B")
         {
-            public long toBytes(long d)
-            {
-                return d;
-            }
+            public long toBytes(long d) { return d; }
 
-            public long toKibibytes(long d)
-            {
-                return (d / 1024L);
-            }
+            public long toKibibytes(long d) { return (d / 1024L); }
 
-            public long toMebibytes(long d)
-            {
-                return (d / (1024L * 1024));
-            }
+            public long toMebibytes(long d) { return (d / (1024L * 1024)); }
 
-            public long toGibibytes(long d)
-            {
-                return (d / (1024L * 1024 * 1024));
-            }
+            public long toGibibytes(long d)  { return (d / (1024L * 1024 * 1024)); }
+
+            public double toBytesDouble(long d) { return (double) d; }
+
+            public double toKibibytesDouble(long d) { return d / 1024.0; }
+
+            public double toMebibytesDouble(long d) { return d / (1024.0 * 1024); }
+
+            public double toGibibytesDouble(long d) { return d / (1024.0 * 1024 * 1024); }
 
             public long convert(long source, DataStorageUnit sourceUnit)
             {
@@ -526,6 +522,14 @@ public long toGibibytes(long d)
                 return (d / (1024L * 1024));
             }
 
+            public double toBytesDouble(long d) { return (double) toBytes(d); }
+
+            public double toKibibytesDouble(long d) { return (double) d; }
+
+            public double toMebibytesDouble(long d) { return d / 1024.0; }
+
+            public double toGibibytesDouble(long d) { return d / (1024.0 * 1024); }
+
             public long convert(long source, DataStorageUnit sourceUnit)
             {
                 return sourceUnit.toKibibytes(source);
@@ -543,16 +547,21 @@ public long toKibibytes(long d)
                 return x(d, 1024L, (MAX / 1024L));
             }
 
-            public long toMebibytes(long d)
-            {
-                return d;
-            }
+            public long toMebibytes(long d) { return d; }
 
             public long toGibibytes(long d)
             {
                 return (d / 1024L);
             }
 
+            public double toBytesDouble(long d) { return (double) toBytes(d); }
+
+            public double toKibibytesDouble(long d) { return (double) toKibibytes(d); }
+
+            public double toMebibytesDouble(long d) { return (double) d; }
+
+            public double toGibibytesDouble(long d) { return d / 1024.0; }
+
             public long convert(long source, DataStorageUnit sourceUnit)
             {
                 return sourceUnit.toMebibytes(source);
@@ -575,10 +584,15 @@ public long toMebibytes(long d)
                 return x(d, 1024L, (MAX / 1024L));
             }
 
-            public long toGibibytes(long d)
-            {
-                return d;
-            }
+            public long toGibibytes(long d) { return d; }
+
+            public double toBytesDouble(long d) { return (double) toBytes(d); }
+
+            public double toKibibytesDouble(long d) { return (double) toKibibytes(d); }
+
+            public double toMebibytesDouble(long d) { return (double) toMebibytes(d); }
+
+            public double toGibibytesDouble(long d) { return (double) d; }
 
             public long convert(long source, DataStorageUnit sourceUnit)
             {
@@ -627,26 +641,51 @@ public static DataStorageUnit fromSymbol(String symbol)
             this.symbol = symbol;
         }
 
+        public String getSymbol()
+        {
+            return symbol;
+        }
+
         public long toBytes(long d)
         {
             throw new AbstractMethodError();
         }
 
+        public double toBytesDouble(long d)
+        {
+            throw new AbstractMethodError();
+        }
+
         public long toKibibytes(long d)
         {
             throw new AbstractMethodError();
         }
 
+        public double toKibibytesDouble(long d)
+        {
+            throw new AbstractMethodError();
+        }
+
         public long toMebibytes(long d)
         {
             throw new AbstractMethodError();
         }
 
+        public double toMebibytesDouble(long d)
+        {
+            throw new AbstractMethodError();
+        }
+
         public long toGibibytes(long d)
         {
             throw new AbstractMethodError();
         }
 
+        public double toGibibytesDouble(long d)
+        {
+            throw new AbstractMethodError();
+        }
+
         public long convert(long source, DataStorageUnit sourceUnit)
         {
             throw new AbstractMethodError();

diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java
@@ -139,7 +139,7 @@ public TimeUnit unit()
      * @param symbol the time unit symbol
      * @return the time unit associated to the specified symbol
      */
-    static TimeUnit fromSymbol(String symbol)
+    public static TimeUnit fromSymbol(String symbol)
     {
         switch (symbol.toLowerCase())
         {