From adef7ceae1086cd8f46a877f1b908716343fac8f Mon Sep 17 00:00:00 2001 From: hmhummel Date: Wed, 14 Aug 2024 16:44:04 -0700 Subject: [PATCH 1/3] Added sphinx documentation for simplify and created an html page for causaleffectpy on which to store documentation. Swapped causaleffect function names for y0 function names --- documentation/.DS_Store | Bin 0 -> 6148 bytes documentation/Makefile | 20 ++++++++++ documentation/make.bat | 35 +++++++++++++++++ documentation/source/conf.py | 28 ++++++++++++++ documentation/source/functions.rst | 8 ++++ documentation/source/index.rst | 29 ++++++++++++++ documentation/source/simplify.rst | 59 +++++++++++++++++++++++++++++ 7 files changed, 179 insertions(+) create mode 100644 documentation/.DS_Store create mode 100644 documentation/Makefile create mode 100644 documentation/make.bat create mode 100644 documentation/source/conf.py create mode 100644 documentation/source/functions.rst create mode 100644 documentation/source/index.rst create mode 100644 documentation/source/simplify.rst diff --git a/documentation/.DS_Store b/documentation/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..841fb83b947b762a0467298d9ea3be570d956bba GIT binary patch literal 6148 zcmeH~&ui2`6vyAR+jiC>>_MRyCE&HxZC4cW5@S_72x5vJRASPk8k)^4N!C3q3pwi_ zLuYT{NN`WKYAxJnbi|J)4wH? zBCng_@VnU9>}_rD$Q`*W--RF5ENtRtS_g6UMz>CsDw2zOoV?1jSv0uzNR>^Tms!OL zd72^Q?d!Zu)htlcGOal`Gy}3P`_W);K0h1{_r1}*qs6{Azkh`KozdOJqA#!Cy8Yl~ zbY6^0^_icU1lA_Dt5z@M3o4yA`V>}Wp~?^F)ftk;6w^6nbO1gvHJ!B$Y{U)3blFIH zZsX6GWwsKp@79gqrkWJES9D6Nu|NJ6dr8PIz5fdPn{+}?=qW|aw4w$?BYJ^vk;2E| zR-!LMUx2bg3*QIdX@c>yW$yHFg(aHKn1G!Za@X;21RMcJV1FD=toNu)?YSKyA!-ueU7WRZ9`0lvA z$KfP=qpoxW9D!v5o2EI?`~TxVpZ}MWT+0z~1TGZ;(K`;01H6*mTi0Gr@3kKMJzSfP nYmIUW3cDS1MQ_D>a5jv2x&rmBwnotdb3X!%!4;0cKPB)Bw6f41 literal 0 HcmV?d00001 diff --git a/documentation/Makefile b/documentation/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/documentation/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/documentation/make.bat b/documentation/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/documentation/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/documentation/source/conf.py b/documentation/source/conf.py new file mode 100644 index 0000000..5f72bf5 --- /dev/null +++ b/documentation/source/conf.py @@ -0,0 +1,28 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Causaleffectpy documentation' +author = 'Haley Hummel' +release = '0.0.1' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +master_doc = 'index' +extensions = [] + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ['_static'] diff --git a/documentation/source/functions.rst b/documentation/source/functions.rst new file mode 100644 index 0000000..c5a0b01 --- /dev/null +++ b/documentation/source/functions.rst @@ -0,0 +1,8 @@ +CausalEffect Functions +======================= + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + simplify \ No newline at end of file diff --git a/documentation/source/index.rst b/documentation/source/index.rst new file mode 100644 index 0000000..8ba22ec --- /dev/null +++ b/documentation/source/index.rst @@ -0,0 +1,29 @@ +.. Causaleffectpy documentation documentation master file, created by + sphinx-quickstart on Tue Aug 13 12:31:43 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Causaleffect Documentation +========================== + +This documentation provides an overview of `causaleffectpy`, which is derived from Santu Tikka's `causaleffect` R package. This documentation will focus on `simplify` and related functions in order to integrate them into the open source `y0` (Why Not?) Python package. For further information, see Tikka & Karvanen (2017) "Simplifying Probabilistic Expressions in Causal Inference". + +.. toctree:: + :maxdepth: 2 + + functions + + +References +=============== + +Hoyt, C.T., Zucker, J., & Parent, M-A. (2021). Y0 “Why Not?” for Causal Inference in Python (1.0) [Python package]. 10.5281/zenodo.4950768. https://github.com/y0-causal-inference/y0. +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` \ No newline at end of file diff --git a/documentation/source/simplify.rst b/documentation/source/simplify.rst new file mode 100644 index 0000000..54c5cab --- /dev/null +++ b/documentation/source/simplify.rst @@ -0,0 +1,59 @@ +Simplify +======== + +This function algebraically simplifies probabilistic expressions given by the ID algorithm from :func:`identify`. It always attempts to perform maximal simplification, meaning that as many variables of the set are removed as possible. If the simplification in terms of the entire set cannot be completed, the intermediate result with as many variables simplified as possible should be returned. + +Run :func:`identify` with the graph information first, then use the output of :func:`identify` as the `P` in :func:`parse_causaleffect`. Use the output from :func:`parse_causaleffect` as the `P` in :func:`simplify`. + +For further information, see Tikka & Karvanen (2017) "Simplifying Probabilistic Expressions in Causal Inference" Algorithm 1. + + +Parameters +---------- +P : `sympy` expression or `y0` `Probability` object + The probabilistic expression that will be simplified, typically created using symbolic expressions in `sympy` or using `y0`'s Probability class. +topo : list of nodes + The topological ordering of the vertices in graph `G`, which can be obtained using `networkx.topological_sort`. +G_unobs : networkx.DiGraph object + A separate directed acyclic graph (DAG) that includes explicit nodes for unobserved confounders, created using `networkx.DiGraph`. +G : networkx.DiGraph object + Main graph G, which includes bidirected edges, and is created with :func:`igraph.graph_formula`. +G_obs : networkx.DiGraph object + A DAG that only includes directed edges, representing observed variables, created using `networkx.DiGraph`. + +Details +------- +This function depends on several other functions and classes, including: :func:`parents`, :func:`ancestors`, :func:`parse_causaleffect`, :func:`is_d_separated`, and :class:`probability`. + +Returns +------- +list + Section in-progress + +References +---------- +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. + + +See Also +-------- +:func:`identify`, :func:`parse_causaleffect`, :func:`get.expression`, :class:`probability` + +Examples +-------- +Section in-progress + +.. code-block:: python + + +Keywords +-------- +models, manip, math, utilities +Concepts +-------- +probabilistic expressions, graph theory, causal inference + +Author +------ +Haley Hummel, +Psychology PhD student at Oregon State University From ecf838b9f299bee0231b738b25c2e8b43c3ab9a5 Mon Sep 17 00:00:00 2001 From: hmhummel Date: Thu, 15 Aug 2024 16:51:17 -0700 Subject: [PATCH 2/3] Tidied documentation; added sphinx documentation for , , and and integrated into existing html files --- documentation/source/functions.rst | 7 ++- documentation/source/index.rst | 2 +- documentation/source/insert.rst | 69 +++++++++++++++++++++++++++ documentation/source/join.rst | 75 ++++++++++++++++++++++++++++++ documentation/source/powerset.rst | 44 ++++++++++++++++++ documentation/source/simplify.rst | 18 ++++--- 6 files changed, 205 insertions(+), 10 deletions(-) create mode 100644 documentation/source/insert.rst create mode 100644 documentation/source/join.rst create mode 100644 documentation/source/powerset.rst diff --git a/documentation/source/functions.rst b/documentation/source/functions.rst index c5a0b01..4930738 100644 --- a/documentation/source/functions.rst +++ b/documentation/source/functions.rst @@ -2,7 +2,10 @@ CausalEffect Functions ======================= .. toctree:: - :maxdepth: 1 + :maxdepth: 4 :titlesonly: - simplify \ No newline at end of file + simplify + join + insert + powerset \ No newline at end of file diff --git a/documentation/source/index.rst b/documentation/source/index.rst index 8ba22ec..63742fa 100644 --- a/documentation/source/index.rst +++ b/documentation/source/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Causaleffect Documentation +`Causaleffectpy` Documentation ========================== This documentation provides an overview of `causaleffectpy`, which is derived from Santu Tikka's `causaleffect` R package. This documentation will focus on `simplify` and related functions in order to integrate them into the open source `y0` (Why Not?) Python package. For further information, see Tikka & Karvanen (2017) "Simplifying Probabilistic Expressions in Causal Inference". diff --git a/documentation/source/insert.rst b/documentation/source/insert.rst new file mode 100644 index 0000000..af70504 --- /dev/null +++ b/documentation/source/insert.rst @@ -0,0 +1,69 @@ +Insert +====== + +The `Insert` function inserts a missing variable into a joint distribution `P(J|D)` using d-separation criteria in a given graph `G`. It is called when there are variables without corresponding terms in the expression. + +Parameters +---------- +J : list of str + The set of variables representing the joint distribution. +D : list of str + The set of variables representing the conditioning set of the joint distribution. +M : str + The variable to be inserted. +cond : list of str + The set of conditioning variables. +S : list of str + The current summation variable. +O : list of str + The set of observed variables. +G_unobs : y0.Graph + Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +G : y0.Graph + Main graph `G`. Includes bidirected edges. +G_obs : y0.Graph + Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +topo : list of str + The topological ordering of the vertices in graph `G`. + +Returns +------- +dict + A dictionary with the following keys: + - `J_new`: list of str. An updated set of joint distribution variables. + - `D_new`: list of str. An updated set of conditioning variables. + - `M`: str. The inserted variable. + - `ds_i`: list of str. The subset from the power set used in the insertion. + + If no conditions were met, `insert` will return the original `J` and `D`. + + +Examples +-------- +Section in-progress +.. code-block:: python + + +See Also +-------- +- :func:`join` +- :func:`simplify` +- :func:`wrap_dSep` +- :func:`powerset` + +Keywords +-------- +models, manip, math, utilities, graphs, methods, multivariate, distribution, probability + +Concepts +-------- +probabilistic expressions, graph theory, joint distribution, causal inference, d-separation + +References +---------- +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. *Journal of Machine Learning Research*, 18(36), 1-30. + +Author +------ +Haley Hummel, +Psychology PhD student at Oregon State University \ No newline at end of file diff --git a/documentation/source/join.rst b/documentation/source/join.rst new file mode 100644 index 0000000..6412bb2 --- /dev/null +++ b/documentation/source/join.rst @@ -0,0 +1,75 @@ +Join +==== + +The `join` function determines whether the terms of the atomic expression actually represent a joint distribution. +It attempts to combine two terms: the joint term `P(J|D)` obtained from `simplify()` and the +term `P(V|C) := P(Vk|Ck)` of the current iteration step. The goal is to +determine if these terms can be combined based on the d-separation criteria in the graph `G`. + +Parameters +---------- +J : list of str + Joint set `P(J|D)`; already processed and included in the joint distribution + from previous `simplify` iteration. Initially, may be empty for the starting point of + the joint distribution. `vari` is added to expand it if d-separation conditions are met. +D : list of str + Term `P(V|C) := P(Vk|Ck)`; set of variables that condition the joint distribution. + `join` checks and updates `D` as necessary to maintain the validity of the joint distribution + when combined with `vari`. +vari : str + Current variable being considered for inclusion in the joint distribution. +cond : list of str + Set of variables that condition the current variable `vari`. `join` uses `cond` + to evaluate conditional independence and determine if `vari` can be added to `J`. +S : list of str + Not used directly in `join`. Current summation variable. +M : list of str + Missing variables (variables not contained within the expression). +O : list of str + Observed variables (variables contained within the expression). +G_unobs : y0.Graph + Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +G : y0.Graph + Main graph `G`. Includes bidirected edges. +G_obs : y0.Graph + Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +topo : list of str + The topological ordering of the vertices in graph `G`. + +Returns +------- +list of str + The joint result, or the original result if none of the conditions for joining were met. + +Dependencies +------- +This function depends on several functions from the causaleffect package, including: :func:`powerset`, :func:`is_d_separated`, and :func:`insert`. + +See Also +-------- +- :func:`simplify` +- :func:`is_d_separated` +- :func:`insert` + +Examples +-------- +Section in-progress +.. code-block:: python + +Keywords +-------- +models, manip, math, utilities + +Concepts +-------- +probabilistic expressions, graph theory, causal inference + +References +---------- +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. + +Author +------ +Haley Hummel, +Psychology PhD student at Oregon State University + diff --git a/documentation/source/powerset.rst b/documentation/source/powerset.rst new file mode 100644 index 0000000..f168f8e --- /dev/null +++ b/documentation/source/powerset.rst @@ -0,0 +1,44 @@ +Powerset +======== + +The `Powerset` function generates the power set of a given set. The power set is the set of all possible subsets of the original set, including the empty set and the set itself. + +Parameters +---------- +set : list + A list representing the original set for which the power set will be generated. The set can contain any type of elements (e.g., numeric, string, or boolean). + +Details +------- +The function computes all possible combinations of the elements of the input set. This includes the empty subset, individual elements, and all larger subsets up to and including the full set. The number of subsets in the power set of a set of size `n` is `2^n`. + +Returns +------- +list of lists + A list of lists, where each inner list is a subset of the original input set. The list contains `2^n` subsets, where `n` is the length of the input set. If the input set is empty, the function returns a list containing only the empty set. + +Examples +-------- +Section in-progress +.. code-block:: python + +See Also +-------- +- `join`: for using `powerset` with conditional independence in probabilistic graphical models. + +Keywords +-------- +set theory, combinatorics + +Concepts +-------- +power set, subsets + +References +---------- +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. *Journal of Machine Learning Research*, 18(36), 1-30. + +Author +------ +Haley Hummel, +Psychology PhD student at Oregon State University \ No newline at end of file diff --git a/documentation/source/simplify.rst b/documentation/source/simplify.rst index 54c5cab..860b476 100644 --- a/documentation/source/simplify.rst +++ b/documentation/source/simplify.rst @@ -21,23 +21,23 @@ G : networkx.DiGraph object G_obs : networkx.DiGraph object A DAG that only includes directed edges, representing observed variables, created using `networkx.DiGraph`. -Details -------- -This function depends on several other functions and classes, including: :func:`parents`, :func:`ancestors`, :func:`parse_causaleffect`, :func:`is_d_separated`, and :class:`probability`. Returns ------- list Section in-progress -References ----------- -Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +Dependencies +------- +This function depends on several other functions and classes, including: :func:`parents`, :func:`ancestors`, :func:`parse_causaleffect`, :func:`is_d_separated`, and :class:`probability`. See Also -------- -:func:`identify`, :func:`parse_causaleffect`, :func:`get.expression`, :class:`probability` +- :func:`identify` +- :func:`parse_causaleffect` +- :func:`get.expression` +- :class:`probability` Examples -------- @@ -53,6 +53,10 @@ Concepts -------- probabilistic expressions, graph theory, causal inference +References +---------- +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. + Author ------ Haley Hummel, From 73dc6a630fbe643fd2c4ab394d777dec50c51587 Mon Sep 17 00:00:00 2001 From: hmhummel Date: Thu, 15 Aug 2024 16:55:54 -0700 Subject: [PATCH 3/3] Fixed typo in index.rst --- documentation/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/source/index.rst b/documentation/source/index.rst index 63742fa..aaac172 100644 --- a/documentation/source/index.rst +++ b/documentation/source/index.rst @@ -1,4 +1,4 @@ -.. Causaleffectpy documentation documentation master file, created by +.. Causaleffectpy documentation master file, created by sphinx-quickstart on Tue Aug 13 12:31:43 2024. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive.