Skip to content

Commit

Permalink
Add alias for rand functions
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed Oct 23, 2024
1 parent cff8c12 commit 3fb2174
Showing 1 changed file with 152 additions and 15 deletions.
167 changes: 152 additions & 15 deletions unsafe/src/main/scala/org/apache/spark/sql/daria/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package org.apache.spark.sql.daria

import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.expressions.{Expression, RandGamma}
import org.apache.spark.sql.functions.{lit, log, signum, when}
import org.apache.spark.sql.functions.{lit, log, signum}
import org.apache.spark.sql.{functions => F}
import org.apache.spark.util.Utils

Expand All @@ -15,49 +15,96 @@ object functions {
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(seed: Long, shape: Double, scale: Double): Column = withExpr(RandGamma(seed, shape, scale)).alias("gamma_random")
def randGamma(seed: Long, shape: Double, scale: Double): Column = withExpr(RandGamma(seed, shape, scale)).alias("gamma_random")

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(seed: Column, shape: Column, scale: Column): Column = withExpr(RandGamma(seed.expr, shape.expr, scale.expr)).alias("gamma_random")
def randGamma(seed: Column, shape: Column, scale: Column): Column = withExpr(RandGamma(seed.expr, shape.expr, scale.expr)).alias("gamma_random")

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(shape: Double, scale: Double): Column = rand_gamma(Utils.random.nextLong, shape, scale)
def randGamma(shape: Double, scale: Double): Column = randGamma(Utils.random.nextLong, shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(shape: Column, scale: Column): Column = rand_gamma(lit(Utils.random.nextLong), shape, scale)
def randGamma(shape: Column, scale: Column): Column = randGamma(lit(Utils.random.nextLong), shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* @note The function is non-deterministic in general case.
*/
def randGamma(): Column = randGamma(1.0, 1.0)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* An alias of `randGamma`
* @note The function is non-deterministic in general case.
*/
def rand_gamma(seed: Long, shape: Double, scale: Double): Column = randGamma(seed, shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
* An alias of `randGamma`
* @note The function is non-deterministic in general case.
*/
def rand_gamma(seed: Column, shape: Column, scale: Column): Column = randGamma(seed, shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* An alias of `randGamma`
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(shape: Double, scale: Double): Column = randGamma(shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with the specified shape and scale parameters.
*
* An alias of `randGamma`
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(shape: Column, scale: Column): Column = randGamma(shape, scale)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Gamma distribution with default parameters (shape = 1.0, scale = 1.0).
*
* An alias of `randGamma`
*
* @return A column with i.i.d. samples from the default Gamma distribution.
*
* @note The function is non-deterministic in general case.
*/
def rand_gamma(): Column = rand_gamma(1.0, 1.0)
def rand_gamma(): Column = randGamma()

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* @note The function is non-deterministic in general case.
*/
def rand_laplace(seed: Long, mu: Column, beta: Column): Column = {
def randLaplace(seed: Long, mu: Column, beta: Column): Column = {
val u = F.rand(seed) - lit(0.5)
mu - beta * signum(u) * log(lit(1) - (lit(2) * F.abs(u)))
}
Expand All @@ -68,41 +115,83 @@ object functions {
*
* @note The function is non-deterministic in general case.
*/
def randLaplace(seed: Long, mu: Double, beta: Double): Column = {
randLaplace(seed, lit(mu), lit(beta))
}

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* @note The function is non-deterministic in general case.
*/
def randLaplace(mu: Column, beta: Column): Column = randLaplace(Utils.random.nextLong, mu, beta)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* @note The function is non-deterministic in general case.
*/
def randLaplace(mu: Double, beta: Double): Column = randLaplace(Utils.random.nextLong, mu, beta)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with default parameters (mu = 0.0, beta = 1.0).
*
* @note The function is non-deterministic in general case.
*/
def randLaplace(): Column = randLaplace(0.0, 1.0)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* An alias of `randLaplace`
*
* @note The function is non-deterministic in general case.
*/
def rand_laplace(seed: Long, mu: Double, beta: Double): Column = {
rand_laplace(seed, lit(mu), lit(beta))
randLaplace(seed, mu, beta)
}

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* An alias of `randLaplace`
*
* @note The function is non-deterministic in general case.
*/
def rand_laplace(mu: Column, beta: Column): Column = rand_laplace(Utils.random.nextLong, mu, beta)
def rand_laplace(mu: Column, beta: Column): Column = randLaplace(mu, beta)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with the specified location parameter `mu` and scale parameter `beta`.
*
* An alias of `randLaplace`
*
* @note The function is non-deterministic in general case.
*/
def rand_laplace(mu: Double, beta: Double): Column = rand_laplace(Utils.random.nextLong, mu, beta)
def rand_laplace(mu: Double, beta: Double): Column = randLaplace(mu, beta)

/**
* Generate a column with independent and identically distributed (i.i.d.) samples
* from the Laplace distribution with default parameters (mu = 0.0, beta = 1.0).
*
* An alias of `randLaplace`
*
* @note The function is non-deterministic in general case.
*/
def rand_laplace(): Column = rand_laplace(0.0, 1.0)
def rand_laplace(): Column = randLaplace()

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* @note The function is non-deterministic in general case.
*/
def rand_range(seed: Long, min: Column, max: Column): Column = {
def randRange(seed: Long, min: Column, max: Column): Column = {
min + (max - min) * F.rand(seed)
}

Expand All @@ -112,28 +201,76 @@ object functions {
*
* @note The function is non-deterministic in general case.
*/
def randRange(seed: Long, min: Int, max: Int): Column = {
randRange(seed, lit(min), lit(max))
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* @note The function is non-deterministic in general case.
*/
def randRange(min: Int, max: Int): Column = {
randRange(Utils.random.nextLong, min, max)
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* @note The function is non-deterministic in general case.
*/
def randRange(min: Column, max: Column): Column = {
randRange(Utils.random.nextLong, min, max)
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* An alias of `randRange`
*
* @note The function is non-deterministic in general case.
*/
def rand_range(seed: Long, min: Column, max: Column): Column = {
randRange(seed, min, max)
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* An alias of `randRange`
*
* @note The function is non-deterministic in general case.
*/
def rand_range(seed: Long, min: Int, max: Int): Column = {
rand_range(seed, lit(min), lit(max))
randRange(seed, min, max)
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* An alias of `randRange`
*
* @note The function is non-deterministic in general case.
*/
def rand_range(min: Int, max: Int): Column = {
rand_range(Utils.random.nextLong, min, max)
randRange(min, max)
}

/**
* Generate a random column with independent and identically distributed (i.i.d.) samples
* uniformly distributed in [`min`, `max`).
*
* An alias of `randRange`
*
* @note The function is non-deterministic in general case.
*/
def rand_range(min: Column, max: Column): Column = {
rand_range(Utils.random.nextLong, min, max)
randRange(min, max)
}

/**
Expand Down

0 comments on commit 3fb2174

Please sign in to comment.