standardised t distribution

Primebrook · Primebrook · commit e3720309cede · 2025-01-31T11:21:59.000Z
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,6 @@ exstatic-*.tar
 
 # Temporary files, for example, from tests.
 /tmp/
+
+# Shared binary artifacts (compiled NIFs).
+priv/native/*.so
diff --git a/lib/exstatic/distribution.ex b/lib/exstatic/distribution.ex
@@ -2,7 +2,7 @@ defmodule Exstatic.Distribution do
   @type t() :: struct()
   @type error() :: {:error, atom()}
 
-  @callback mean(distribution :: t()) :: float() | :undefined
+  @callback mean(distribution :: t()) :: float()
   @callback variance(distribution :: t()) :: float() | :undefined | :infinity
   @callback std_dev(distribution :: t()) :: float()
   @callback entropy(distribution :: t()) :: float()
diff --git a/lib/exstatic/distribution/t.ex b/lib/exstatic/distribution/t.ex
@@ -1,53 +1,133 @@
-defmodule Exstatic.Distribution.T do
+defmodule Exstatic.Distribution.StandardizedT do
   @moduledoc """
-  Student's t-distribution implementation.
+  The standardized Student's t-distribution, used in statistical hypothesis testing.
+
+  This implementation ensures that:
+  - The mean is always `0.0`.
+  - The variance exists for `df > 1` (it is infinite for `1 < df ≤ 2`).
+  - The distribution is well-defined only for `df > 1`.
+
+  ## Examples
+
+      iex> alias Exstatic.Distribution.StandardizedT
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> StandardizedT.mean(t)
+      0.0
+      iex> pdf = StandardizedT.pdf(t, 0.0)
+      iex> TestHelper.assert_in_delta(pdf, 0.37960669, 1.0e-6)
+      true
+      iex> result = StandardizedT.cdf(t, 0.0)
+      iex> TestHelper.assert_in_delta(result, 0.5)
+      true
   """
 
   @behaviour Exstatic.Distribution
   @behaviour Exstatic.Continuous
   @behaviour Exstatic.ContinuousCDF
 
-  defstruct [:mean, :std_dev, :df]
+  defstruct [:df]
 
-  @type t :: %__MODULE__{
-          mean: float(),
-          std_dev: float(),
-          df: float()
-        }
+  @type t :: %__MODULE__{df: float()}
 
-  def new(mean, std_dev, df) when is_number(mean) and is_number(std_dev) and is_number(df) do
-    cond do
-      std_dev <= 0 -> {:error, :invalid_std_dev}
-      df <= 0 -> {:error, :invalid_df}
-      true -> {:ok, %__MODULE__{mean: mean, std_dev: std_dev, df: df}}
-    end
-  end
+  @doc """
+  Creates a new standardized Student's t-distribution with the given degrees of freedom.
 
-  @impl Exstatic.Distribution
-  def mean(%__MODULE__{mean: mean, df: df}) do
-    if df > 1, do: mean, else: :undefined
+  ## Parameters
+  - `df` - The degrees of freedom (`df > 1` required).
+
+  ## Examples
+
+      iex> alias Exstatic.Distribution.StandardizedT
+      iex> StandardizedT.new(5.0)
+      {:ok, %StandardizedT{df: 5.0}}
+
+      iex> StandardizedT.new(1.0)
+      {:error, :invalid_df}
+
+      iex> StandardizedT.new(-5.0)
+      {:error, :invalid_df}
+  """
+  def new(df) when is_number(df) and df > 1 do
+    {:ok, %__MODULE__{df: df}}
   end
 
+  def new(_df), do: {:error, :invalid_df}
+
+  @doc """
+  Returns the mean of the t-distribution.
+
+  The mean is always `0.0` for standardized t-distributions since `df > 1`.
+
+  ## Examples
+
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> StandardizedT.mean(t)
+      0.0
+  """
   @impl Exstatic.Distribution
-  def std_dev(%__MODULE__{std_dev: std_dev}), do: std_dev
+  def mean(_t), do: 0.0
+
+  @doc """
+  Returns the variance of the t-distribution.
 
+  - If `1 < df ≤ 2`, the variance is `:infinity`.
+  - Otherwise, the variance is computed using `Exstatic.Native.standardized_t_variance/1`.
+
+  ## Examples
+
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> TestHelper.assert_in_delta(StandardizedT.variance(t), 5.0 / (5.0 - 2.0), 1.0e-10)
+      true
+
+      iex> {:ok, t} = StandardizedT.new(1.5)
+      iex> StandardizedT.variance(t)
+      :infinity
+  """
   @impl Exstatic.Distribution
-  @spec variance(t) :: float() | :infinity | :undefined
-  def variance(%__MODULE__{std_dev: std_dev, df: df}) do
-    cond do
-      df <= 1.0 -> :undefined
-      df > 1.0 and df <= 2.0 -> :infinity
-      true -> Exstatic.Native.t_variance(std_dev, df)
-    end
+  @spec variance(t) :: float() | :infinity
+  def variance(%__MODULE__{df: df}) do
+    if df <= 2.0, do: :infinity, else: Exstatic.Native.standardized_t_variance(df)
   end
 
+  @doc """
+  Computes the probability density function (PDF) at `x`.
+
+  ## Examples
+
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> TestHelper.assert_in_delta(StandardizedT.pdf(t, 0.0), 0.37960669, 1.0e-6)
+      true
+  """
   @impl Exstatic.Continuous
   def pdf(%__MODULE__{} = dist, x) when is_number(x) do
-    Exstatic.Native.t_pdf(dist.mean, dist.std_dev, dist.df, x)
+    Exstatic.Native.standardized_t_pdf(dist.df, x)
   end
 
+  @doc """
+  Computes the cumulative distribution function (CDF) at `x`.
+
+  ## Examples
+
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> TestHelper.assert_in_delta(StandardizedT.cdf(t, 0.0), 0.5, 1.0e-10)
+      true
+  """
   @impl Exstatic.ContinuousCDF
   def cdf(%__MODULE__{} = dist, x) when is_number(x) do
-    Exstatic.Native.t_cdf(dist.mean, dist.std_dev, dist.df, x)
+    Exstatic.Native.standardized_t_cdf(dist.df, x)
+  end
+
+  @doc """
+  Computes the survival function (SF) at `x`, which is `1 - CDF(x)`.
+
+  ## Examples
+
+      iex> {:ok, t} = StandardizedT.new(5.0)
+      iex> TestHelper.assert_in_delta(StandardizedT.sf(t, 0.0), 0.5, 1.0e-10)
+      true
+  """
+  @impl Exstatic.ContinuousCDF
+  def sf(%__MODULE__{} = dist, x) when is_number(x) do
+    Exstatic.Native.standardized_t_sf(dist.df, x)
   end
 end
diff --git a/lib/exstatic/native.ex b/lib/exstatic/native.ex
@@ -23,12 +23,16 @@ defmodule Exstatic.Native do
   @spec normal_variance(float()) :: float()
   def normal_variance(_std_dev), do: :erlang.nif_error(:nif_not_loaded)
 
-  @spec t_pdf(float(), float(), float(), float()) :: float()
-  def t_pdf(_mean, _std_dev, _df, _x), do: :erlang.nif_error(:nif_not_loaded)
+  @spec standardized_t_pdf(float(), float()) :: float()
+  def standardized_t_pdf(_df, _x), do: :erlang.nif_error(:nif_not_loaded)
 
-  @spec t_cdf(float(), float(), float(), float()) :: float()
-  def t_cdf(_mean, _std_dev, _df, _x), do: :erlang.nif_error(:nif_not_loaded)
+  @spec standardized_t_cdf(float(), float()) :: float()
+  def standardized_t_cdf(_df, _x), do: :erlang.nif_error(:nif_not_loaded)
 
-  @spec t_variance(float(), float()) :: {:ok, float()} | {:error, String.t()}
-  def t_variance(_std_dev, _df), do: :erlang.nif_error(:nif_not_loaded)
+  @spec standardized_t_sf(float(), float()) :: float()
+  def standardized_t_sf(_df, _x), do: :erlang.nif_error(:nif_not_loaded)
+
+  @spec standardized_t_variance(float()) :: {:ok, float()} | {:error, String.t()}
+  def standardized_t_variance(_df), do: :erlang.nif_error(:nif_not_loaded)
 end
+
diff --git a/mix.lock b/mix.lock
@@ -1,6 +1,6 @@
 %{
   "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"},
-  "hpax": {:hex, :hpax, "1.0.1", "c857057f89e8bd71d97d9042e009df2a42705d6d690d54eca84c8b29af0787b0", [:mix], [], "hexpm", "4e2d5a4f76ae1e3048f35ae7adb1641c36265510a2d4638157fbcb53dda38445"},
+  "hpax": {:hex, :hpax, "1.0.2", "762df951b0c399ff67cc57c3995ec3cf46d696e41f0bba17da0518d94acd4aac", [:mix], [], "hexpm", "2f09b4c1074e0abd846747329eaa26d535be0eb3d189fa69d812bfb8bfefd32f"},
   "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
   "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"},
   "mint": {:hex, :mint, "1.6.2", "af6d97a4051eee4f05b5500671d47c3a67dac7386045d87a904126fd4bbcea2e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "5ee441dffc1892f1ae59127f74afe8fd82fda6587794278d924e4d90ea3d63f9"},
diff --git a/native/exstatic/src/lib.rs b/native/exstatic/src/lib.rs
@@ -52,26 +52,44 @@ fn normal_inverse_cdf(mean: f64, std_dev: f64, p: f64) -> NifResult<f64> {
 }
 
 #[rustler::nif]
-fn t_pdf(mean: f64, std_dev: f64, df: f64, x: f64) -> NifResult<f64> {
-    let t = StudentsT::new(mean, std_dev, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
+fn standardized_t_pdf(df: f64, x: f64) -> NifResult<f64> {
+    if df <= 1.0 {
+        return Err(Error::Term(Box::new("Degrees of freedom must be greater than 1")));
+    }
+
+    let t = StudentsT::new(0.0, 1.0, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
     Ok(t.pdf(x))
 }
 
 #[rustler::nif]
-fn t_cdf(mean: f64, std_dev: f64, df: f64, x: f64) -> NifResult<f64> {
-    let t = StudentsT::new(mean, std_dev, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
+fn standardized_t_cdf(df: f64, x: f64) -> NifResult<f64> {
+    if df <= 1.0 {
+        return Err(Error::Term(Box::new("Degrees of freedom must be greater than 1")));
+    }
+
+    let t = StudentsT::new(0.0, 1.0, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
     Ok(t.cdf(x))
 }
 
 #[rustler::nif]
-fn t_variance(std_dev: f64, df: f64) -> NifResult<f64> {
+fn standardized_t_sf(df: f64, x: f64) -> NifResult<f64> {
+    if df <= 1.0 {
+        return Err(Error::Term(Box::new("Degrees of freedom must be greater than 1")));
+    }
+
+    let t = StudentsT::new(0.0, 1.0, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
+    Ok(1.0 - t.cdf(x))
+}
+
+#[rustler::nif]
+fn standardized_t_variance(df: f64) -> NifResult<f64> {
     if df <= 1.0 {
         return Err(Error::Term(Box::new("Variance is undefined for df ≤ 1")));
     } else if df > 1.0 && df <= 2.0 {
         return Err(Error::Term(Box::new("Variance is infinite for 1 < df ≤ 2")));
     }
 
-    let t = StudentsT::new(0.0, std_dev, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
+    let t = StudentsT::new(0.0, 1.0, df).map_err(|e| Error::Term(Box::new(e.to_string())))?;
     t.variance().ok_or_else(|| Error::Term(Box::new("Failed to calculate variance")))
 }
 
diff --git a/priv/native/libexstatic.so b/priv/native/libexstatic.so
diff --git a/test/exstatic/distribution/t_test.exs b/test/exstatic/distribution/t_test.exs
@@ -1,75 +1,71 @@
-defmodule Exstatic.Distribution.TTest do
+defmodule Exstatic.Distribution.StandardizedTTest do
   use ExUnit.Case, async: true
 
-  alias Exstatic.Distribution.T
+  alias Exstatic.Distribution.StandardizedT
 
-  doctest Exstatic.Distribution.T
+  doctest Exstatic.Distribution.StandardizedT
 
-  describe "new/3" do
-    test "creates a valid t-distribution" do
-      assert {:ok, _t} = T.new(0.0, 1.0, 5.0)
-    end
-
-    test "returns error for invalid std_dev" do
-      assert {:error, :invalid_std_dev} = T.new(0.0, 0.0, 5.0)
+  describe "new/1" do
+    test "creates a valid standardized t-distribution" do
+      assert {:ok, _t} = StandardizedT.new(5.0)
     end
 
     test "returns error for invalid degrees of freedom" do
-      assert {:error, :invalid_df} = T.new(0.0, 1.0, 0.0)
-      assert {:error, :invalid_df} = T.new(0.0, 1.0, -5.0)
+      assert {:error, :invalid_df} = StandardizedT.new(1.0)
+      assert {:error, :invalid_df} = StandardizedT.new(0.0)
+      assert {:error, :invalid_df} = StandardizedT.new(-5.0)
     end
   end
 
   describe "mean/1" do
-    test "returns the mean when df > 1" do
-      {:ok, t} = T.new(5.0, 1.0, 3.0)
-      assert T.mean(t) == 5.0
-    end
+    test "returns 0.0 for any valid standardized t-distribution" do
+      {:ok, t} = StandardizedT.new(3.0)
+      assert StandardizedT.mean(t) == 0.0
 
-    test "returns :undefined when df = 1" do
-      {:ok, t} = T.new(5.0, 1.0, 1.0)
-      assert T.mean(t) == :undefined
-    end
-
-    test "returns :undefined when df < 1" do
-      {:ok, t} = T.new(5.0, 1.0, 0.5)
-      assert T.mean(t) == :undefined
+      {:ok, t} = StandardizedT.new(10.0)
+      assert StandardizedT.mean(t) == 0.0
     end
   end
 
   describe "variance/1" do
     test "returns a finite variance when df > 2" do
-      {:ok, t} = T.new(0.0, 2.0, 5.0)
-      expected_variance = 5.0 / (5.0 - 2.0) * (2.0 * 2.0)
-      assert TestHelper.assert_in_delta(T.variance(t), expected_variance)
+      {:ok, t} = StandardizedT.new(5.0)
+      expected_variance = 5.0 / (5.0 - 2.0)
+      assert TestHelper.assert_in_delta(StandardizedT.variance(t), expected_variance)
     end
 
     test "returns :infinity when 1 < df ≤ 2" do
-      {:ok, t} = T.new(0.0, 1.0, 1.5)
-      assert T.variance(t) == :infinity
-    end
-
-    test "returns :undefined when df ≤ 1" do
-      {:ok, t} = T.new(0.0, 1.0, 1.0)
-      assert T.variance(t) == :undefined
+      {:ok, t} = StandardizedT.new(1.5)
+      assert StandardizedT.variance(t) == :infinity
     end
   end
 
   describe "pdf/2" do
     test "computes valid PDF values" do
-      {:ok, t} = T.new(0.0, 1.0, 5.0)
-      assert TestHelper.assert_in_delta(T.pdf(t, 0.0), 0.37960669, 1.0e-6)
-      assert TestHelper.assert_in_delta(T.pdf(t, 1.0), 0.219679797, 1.0e-6)
+      {:ok, t} = StandardizedT.new(5.0)
+      assert TestHelper.assert_in_delta(StandardizedT.pdf(t, 0.0), 0.37960669, 1.0e-6)
+      assert TestHelper.assert_in_delta(StandardizedT.pdf(t, 1.0), 0.219679797, 1.0e-6)
     end
   end
 
   describe "cdf/2" do
     test "computes valid CDF values" do
-      {:ok, t} = T.new(0.0, 1.0, 5.0)
+      {:ok, t} = StandardizedT.new(5.0)
+
+      assert TestHelper.assert_in_delta(StandardizedT.cdf(t, 0.0), 0.5, 1.0e-9)
+      assert TestHelper.assert_in_delta(StandardizedT.cdf(t, -100.0), 0.0, 1.0e-9)
+      assert TestHelper.assert_in_delta(StandardizedT.cdf(t, 100.0), 1.0, 1.0e-9)
+    end
+  end
+
+  describe "sf/2" do
+    test "computes valid SF values" do
+      {:ok, t} = StandardizedT.new(5.0)
 
-      assert TestHelper.assert_in_delta(T.cdf(t, 0.0), 0.5, 1.0e-9)
-      assert TestHelper.assert_in_delta(T.cdf(t, -100.0), 0.0, 1.0e-9)
-      assert TestHelper.assert_in_delta(T.cdf(t, 100.0), 1.0, 1.0e-9)
+      assert TestHelper.assert_in_delta(StandardizedT.sf(t, 0.0), 0.5, 1.0e-9)
+      assert TestHelper.assert_in_delta(StandardizedT.sf(t, -100.0), 1.0, 1.0e-9)
+      assert TestHelper.assert_in_delta(StandardizedT.sf(t, 100.0), 0.0, 1.0e-9)
     end
   end
 end
+