diff --git a/.github/workflows/update_wiki.yml b/.github/workflows/update_wiki.yml index 2396421..4c235f7 100644 --- a/.github/workflows/update_wiki.yml +++ b/.github/workflows/update_wiki.yml @@ -5,7 +5,7 @@ on: push: # Trigger only when wiki directory changes paths: - - 'doc/**' + - 'wiki/**' # Trigger only on main/beta branches: [ main ] @@ -19,7 +19,7 @@ jobs: - name: Push Wiki Changes uses: Andrew-Chen-Wang/github-wiki-action@v2 env: - WIKI_DIR: doc/ + WIKI_DIR: docs/ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_MAIL: ${{ secrets.EMAIL }} GH_NAME: ${{ github.repository_owner }} diff --git a/README.md b/README.md index db39144..18fd54f 100755 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@
-
-
+
+
-
-
+
+
@@ -20,11 +20,11 @@
## Resources
-* **Documentation / Wiki**: [github.com/bradendubois/probability-code/wiki](https://github.com/bradendubois/probability-code/wiki)
-* **Source Code**: [github.com/bradendubois/probability-code](https://github.com/bradendubois/probability-code)
+* **Documentation / Wiki**: [github.com/bradendubois/do-calculus/wiki](https://github.com/bradendubois/do-calculus/wiki)
+* **Source Code**: [github.com/bradendubois/do-calculus](https://github.com/bradendubois/do-calculus)
* **PyPI**: [pypi.org/project/do-calculus/](https://pypi.org/project/do-calculus/)
-* **Releases**: [github.com/bradendubois/probability-code/releases](https://github.com/bradendubois/probability-code/releases)
-* **Bug reports**: [github.com/bradendubois/probability-code/issues](https://github.com/bradendubois/probability-code/issues)
+* **Releases**: [github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases)
+* **Bug reports**: [github.com/bradendubois/do-calculus/issues](https://github.com/bradendubois/do-calculus/issues)
* **Contact**: [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca)
-See the [wiki](https://github.com/bradendubois/probability-code/wiki) to get started.
+See the [wiki](https://github.com/bradendubois/do-calculus/wiki) to get started.
diff --git a/doc/REPL.md b/debug/REPL.md
similarity index 100%
rename from doc/REPL.md
rename to debug/REPL.md
diff --git a/do/graphs/dataset_generator/distribution_generation.py b/debug/dataset_generator/distribution_generation.py
similarity index 100%
rename from do/graphs/dataset_generator/distribution_generation.py
rename to debug/dataset_generator/distribution_generation.py
diff --git a/do/graphs/dataset_generator/graph_generator.py b/debug/dataset_generator/graph_generator.py
similarity index 100%
rename from do/graphs/dataset_generator/graph_generator.py
rename to debug/dataset_generator/graph_generator.py
diff --git a/do/graphs/dataset_generator/model_generator.py b/debug/dataset_generator/model_generator.py
similarity index 98%
rename from do/graphs/dataset_generator/model_generator.py
rename to debug/dataset_generator/model_generator.py
index eba211c..a0978e4 100755
--- a/do/graphs/dataset_generator/model_generator.py
+++ b/debug/dataset_generator/model_generator.py
@@ -26,7 +26,7 @@
except ValueError:
print("Could not convert", argv[1], "to int; defaulting to", N)
-destination_directory = Path(".", argv[2])
+destination_directory = Path("", argv[2])
if not destination_directory.is_dir():
print("Cannot resolve", destination_directory)
diff --git a/do/graphs/minimal/fumigants.txt b/debug/minimal/fumigants.txt
similarity index 100%
rename from do/graphs/minimal/fumigants.txt
rename to debug/minimal/fumigants.txt
diff --git a/do/graphs/minimal/sunscreen.txt b/debug/minimal/sunscreen.txt
similarity index 100%
rename from do/graphs/minimal/sunscreen.txt
rename to debug/minimal/sunscreen.txt
diff --git a/do/API.py b/do/API.py
index c79a536..2e83284 100644
--- a/do/API.py
+++ b/do/API.py
@@ -2,7 +2,7 @@
# probability-code API #
###########################################################
-from typing import Union
+from typing import Collection, List, Optional, Set, Union
from pathlib import Path
from .api.backdoor_paths import api_backdoor_paths
@@ -10,10 +10,11 @@
from .api.joint_distribution_table import api_joint_distribution_table
from .api.probability_query import api_probability_query
-from .probability.structures.BackdoorController import BackdoorController
-from .probability.structures.CausalGraph import CausalGraph
-from .probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
-from .probability.structures.VariableStructures import Variable
+from .structures.BackdoorController import BackdoorController
+from .structures.CausalGraph import CausalGraph
+from .structures.ConditionalProbabilityTable import ConditionalProbabilityTable
+from .structures.Types import Vertex, Vertices
+from .structures.VariableStructures import Outcome, Intervention
from .util.ModelLoader import parse_model
from .util.OutputLogger import OutputLogger
@@ -32,6 +33,9 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False,
an argument to log_fd, or can be done later with a call to set_log_fd.
@param log_fd: An open file descriptor to write to, if log_details is enabled.
"""
+ self._print_result = print_result
+ self._output = OutputLogger(print_result, print_detail, log, log_fd)
+
if model:
self.load_model(model)
@@ -40,9 +44,6 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False,
self._g = None
self._bc = None
- self._print_result = print_result
- self._output = OutputLogger(print_result, print_detail, log, log_fd)
-
################################################################
# API Modifications #
################################################################
@@ -93,7 +94,7 @@ def set_log_fd(self, log_fd):
# Distributions #
################################################################
- def p(self, y: set, x: set) -> float:
+ def p(self, y: Collection[Outcome], x: Collection[Union[Outcome, Intervention]]) -> Optional[float]:
"""
Compute a probability query of Y, given X.
@param y: Head of query; a set of Outcome objects
@@ -101,11 +102,15 @@ def p(self, y: set, x: set) -> float:
@return: The probability of P(Y | X), in the range [0.0, 1.0]
@raise ProbabilityException when the given probability cannot be computed, such as an invalid Outcome
"""
- # All deconfounding is handled by the CG
- result = api_probability_query(self._cg, y, x)
- self._output.result(result)
+ try:
+ # All deconfounding is handled by the CG
+ result = api_probability_query(self._cg, y, x)
+ self._output.result(result)
+ return result
- return result
+ except AssertionError as e:
+ self._output.detail(e)
+ return None
def joint_distribution_table(self) -> ConditionalProbabilityTable:
"""
@@ -113,7 +118,7 @@ def joint_distribution_table(self) -> ConditionalProbabilityTable:
@return: A list of tuples, (Outcomes, P), where Outcomes is a unique set of Outcome objects for the model, and
P is the corresponding probability.
"""
- result = api_joint_distribution_table(self._cg)
+ result: ConditionalProbabilityTable = api_joint_distribution_table(self._cg)
if self._print_result:
keys = sorted(self._cg.variables.keys())
@@ -126,7 +131,7 @@ def joint_distribution_table(self) -> ConditionalProbabilityTable:
# Pathfinding (Backdoor Controller) #
################################################################
- def backdoor_paths(self, src: set, dst: set, dcf: set) -> list:
+ def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]:
"""
Find all the "backdoor paths" between two sets of variables.
@param src: A set of (string) vertices defined in the loaded model, which will be the source to begin searching
@@ -145,9 +150,10 @@ def backdoor_paths(self, src: set, dst: set, dcf: set) -> list:
for left, right in zip(path[:-1], path[1:]):
print(left, "<-" if right in self._g.parents(left) else "->", end=" ")
print(path[-1])
+
return result
- def deconfounding_sets(self, src: set, dst: set) -> list:
+ def deconfounding_sets(self, src: set, dst: set) -> List[Set[str]]:
"""
Find the sets of vertices in the loaded model that are sufficient at blocking all backdoor paths from all
vertices in src to any vertices in dst
diff --git a/do/__main__.py b/do/__main__.py
index 3df490b..583df22 100644
--- a/do/__main__.py
+++ b/do/__main__.py
@@ -12,7 +12,7 @@
# there being path issues depending on the working directory
-def main(graph_location=Path(".", "src", "graphs", "full")):
+def main(graph_location=Path(".", "graphs")):
"""
Run an interactive IO prompt allowing full use of the causality software.
@param graph_location: A string of the path from the working directory to a directory of graphs
diff --git a/do/api/backdoor_paths.py b/do/api/backdoor_paths.py
index c4ef959..2071c8e 100644
--- a/do/api/backdoor_paths.py
+++ b/do/api/backdoor_paths.py
@@ -1,9 +1,10 @@
-from itertools import product
+from typing import Collection, Dict, List, Optional
-from ..probability.structures.BackdoorController import BackdoorController
+from ..structures.BackdoorController import BackdoorController
+from ..structures.Types import Path, Vertices
-def api_backdoor_paths_parse(query: str) -> (set, set):
+def api_backdoor_paths_parse(query: str) -> Dict[str, Collection[str]]:
"""
Convert a given query string into a pair of sets to compute all backdoor paths between
@param query: A string of the form "X, Y, Z -> A, B, C" or "X, Y, Z -> A, B, C | I, J, K"
@@ -28,7 +29,7 @@ def clean(x):
}
-def api_backdoor_paths(bc: BackdoorController, src: set, dst: set, dcf: set) -> list:
+def api_backdoor_paths(bc: BackdoorController, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]:
"""
Compute and return all the backdoor paths from any vertex in src to any vertex in dst
@param bc: A Backdoor Controller with a graph conforming to the given source and destination sets.
@@ -42,8 +43,4 @@ def api_backdoor_paths(bc: BackdoorController, src: set, dst: set, dcf: set) ->
list containing each vertex (as a string) from the source vertex to the destination vertex, with dcf acting as
a deconfounding set.
"""
- # TODO Add a method in Backdoor Controller that can return all paths immediately
- paths = []
- for s, t in product(src, dst):
- paths += bc.backdoor_paths_pair(s, t, dcf)
- return paths
+ return bc.backdoor_paths(src, dst, dcf)
diff --git a/do/api/deconfounding_sets.py b/do/api/deconfounding_sets.py
index 8d6eff5..fa79843 100644
--- a/do/api/deconfounding_sets.py
+++ b/do/api/deconfounding_sets.py
@@ -1,7 +1,10 @@
-from ..probability.structures.BackdoorController import BackdoorController
+from typing import Collection, Dict, List, Set
+from ..structures.BackdoorController import BackdoorController
+from ..structures.Types import Vertices
-def api_deconfounding_sets_parse(query: str) -> (set, set):
+
+def api_deconfounding_sets_parse(query: str) -> Dict[str, Collection[str]]:
"""
Convert a given query string into a pair of sets to find all sufficient deconfounding sets between.
@param query: A string of the form "X, Y, Z -> A, B, C"
@@ -19,7 +22,7 @@ def clean(x):
}
-def api_deconfounding_sets(bc: BackdoorController, src: set, dst: set) -> list:
+def api_deconfounding_sets(bc: BackdoorController, src: Vertices, dst: Vertices) -> List[Set[str]]:
"""
Compute and return all the backdoor paths from any vertex in src to any vertex is dst
@param bc: A Backdoor Controller with a graph conforming to the given source and destination sets.
diff --git a/do/api/joint_distribution_table.py b/do/api/joint_distribution_table.py
index b4dfcba..5afb8fa 100644
--- a/do/api/joint_distribution_table.py
+++ b/do/api/joint_distribution_table.py
@@ -1,8 +1,8 @@
from itertools import product
-from ..probability.structures.CausalGraph import CausalGraph
-from ..probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
-from ..probability.structures.VariableStructures import Outcome, Variable
+from ..structures.CausalGraph import CausalGraph
+from ..structures.ConditionalProbabilityTable import ConditionalProbabilityTable
+from ..structures.VariableStructures import Outcome, Variable
def api_joint_distribution_table(cg: CausalGraph) -> ConditionalProbabilityTable:
diff --git a/do/api/probability_query.py b/do/api/probability_query.py
index 58c6803..499a9cd 100644
--- a/do/api/probability_query.py
+++ b/do/api/probability_query.py
@@ -1,8 +1,10 @@
-from ..probability.structures.CausalGraph import CausalGraph
-from ..probability.structures.VariableStructures import parse_outcomes_and_interventions
+from typing import Collection, Dict, Union
+from ..structures.CausalGraph import CausalGraph
+from ..structures.VariableStructures import Outcome, Intervention, parse_outcomes_and_interventions
-def api_probability_query_parse(query: str) -> (tuple, tuple):
+
+def api_probability_query_parse(query: str) -> Dict[str, Collection[str]]:
"""
Parse a query string into Outcome and Intervention structures.
@param query: A string of the form "Y=y, X=x | W=w", or just "Y=y, X=x"
@@ -21,7 +23,7 @@ def api_probability_query_parse(query: str) -> (tuple, tuple):
}
-def api_probability_query(cg: CausalGraph, y: set, x: set) -> float:
+def api_probability_query(cg: CausalGraph, y: Collection[Outcome], x: Collection[Union[Outcome, Intervention]]) -> float:
"""
Compute a probability query for the currently loaded causal graph.
@param cg: A Causal Graph containing variables, distributions, etc.
diff --git a/do/config/config_manager.py b/do/config/config_manager.py
index c798f1f..7660e99 100755
--- a/do/config/config_manager.py
+++ b/do/config/config_manager.py
@@ -1,7 +1,7 @@
from pathlib import Path
from yaml import safe_load as load, dump
-from ..config.primary_configuration import *
+from ..config.primary_configuration import primary_config_file
path = Path(".", "config.yml")
diff --git a/do/config/generate_config_docs.py b/do/config/generate_config_docs.py
index 8c10886..bd01cbe 100755
--- a/do/config/generate_config_docs.py
+++ b/do/config/generate_config_docs.py
@@ -4,7 +4,7 @@
from pathlib import Path
-from .primary_configuration import *
+from .primary_configuration import primary_config_file
documentation_file = Path(".", "doc", "Configuration.md")
diff --git a/do/config/primary_configuration.py b/do/config/primary_configuration.py
index 0d95feb..fd02f6e 100755
--- a/do/config/primary_configuration.py
+++ b/do/config/primary_configuration.py
@@ -61,4 +61,3 @@
}]
}
]
-
diff --git a/do/graphs/full/abcd.yml b/do/graphs/abcd.yml
similarity index 100%
rename from do/graphs/full/abcd.yml
rename to do/graphs/abcd.yml
diff --git a/do/graphs/full/fumigants_eelworms.yml b/do/graphs/fumigants_eelworms.yml
similarity index 100%
rename from do/graphs/full/fumigants_eelworms.yml
rename to do/graphs/fumigants_eelworms.yml
diff --git a/do/graphs/full/m-game.yml b/do/graphs/m-game.yml
similarity index 100%
rename from do/graphs/full/m-game.yml
rename to do/graphs/m-game.yml
diff --git a/do/graphs/full/melanoma.yml b/do/graphs/melanoma.yml
similarity index 100%
rename from do/graphs/full/melanoma.yml
rename to do/graphs/melanoma.yml
diff --git a/do/graphs/full/pearl-3.4.yml b/do/graphs/pearl-3.4.yml
similarity index 100%
rename from do/graphs/full/pearl-3.4.yml
rename to do/graphs/pearl-3.4.yml
diff --git a/do/graphs/full/pearl-3.6.yml b/do/graphs/pearl-3.6.yml
similarity index 100%
rename from do/graphs/full/pearl-3.6.yml
rename to do/graphs/pearl-3.6.yml
diff --git a/do/graphs/full/pearl-3.7c.yml b/do/graphs/pearl-3.7c.yml
similarity index 100%
rename from do/graphs/full/pearl-3.7c.yml
rename to do/graphs/pearl-3.7c.yml
diff --git a/do/graphs/full/pearl-7.5.yml b/do/graphs/pearl-7.5.yml
similarity index 100%
rename from do/graphs/full/pearl-7.5.yml
rename to do/graphs/pearl-7.5.yml
diff --git a/do/graphs/full/simulation.json.yml b/do/graphs/simulation.json.yml
similarity index 100%
rename from do/graphs/full/simulation.json.yml
rename to do/graphs/simulation.json.yml
diff --git a/do/graphs/full/square-game.yml b/do/graphs/square-game.yml
similarity index 100%
rename from do/graphs/full/square-game.yml
rename to do/graphs/square-game.yml
diff --git a/do/graphs/full/test.json b/do/graphs/test.json
similarity index 100%
rename from do/graphs/full/test.json
rename to do/graphs/test.json
diff --git a/do/probability/structures/BackdoorController.py b/do/structures/BackdoorController.py
similarity index 85%
rename from do/probability/structures/BackdoorController.py
rename to do/structures/BackdoorController.py
index 08de3b6..6f676bb 100755
--- a/do/probability/structures/BackdoorController.py
+++ b/do/structures/BackdoorController.py
@@ -8,12 +8,13 @@
#########################################################
from itertools import product
+from typing import List, Optional
from .Graph import Graph
+from .Types import Collection, Path, Vertices, Vertex, V_Type
-from ...config.settings import Settings
-from ...util.helpers import minimal_sets
-from ...util.helpers import power_set
+from ..config.settings import Settings
+from ..util.helpers import minimal_sets, power_set, str_map
class BackdoorController:
@@ -31,28 +32,33 @@ def __init__(self, graph: Graph):
self.graph = graph.copy()
self.graph.reset_disabled()
- def backdoor_paths(self, src: set, dst: set, dcf: set) -> list:
+ def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]:
"""
Get all possible backdoor paths between some source set of vertices in the internal graph to any vertices in
some destination set of vertices. A given (possibly empty) set of deconfounding vertices may serve to block, or
even open, some backdoor paths.
@param src: The source set of (string) vertices to search for paths from
@param dst: The destination set of (string) vertices to search from src towards.
- @param dcf: A set of (string) vertices that may serve as a sufficient deconfounding set to block or open
+ @param dcf: An optional set of (string) vertices that may serve as a sufficient deconfounding set to block or open
backdoor paths.
@return: A list of lists, where each sublist contains a backdoor path, the first and last element being a
vertex from src and dst, respectively, with all vertices between representing the path. All elements are
string vertices.
"""
+
paths = []
+ src_str = str_map(src)
+ dst_str = str_map(dst)
+ dcf_str = str_map(dcf) if dcf else set()
+
# Use the product of src, dst to try each possible pairing
- for s, t in product(src, dst):
- paths += self.backdoor_paths_pair(s, t, dcf)
+ for s, t in product(src_str, dst_str):
+ paths += self._backdoor_paths_pair(s, t, dcf_str)
return paths
- def backdoor_paths_pair(self, s: str, t: str, dcf: set) -> list:
+ def _backdoor_paths_pair(self, s: Collection[str], t: Collection[str], dcf: Collection[str]) -> List[Path]:
"""
Find all backdoor paths between any particular pair of vertices in the loaded graph
@param s: A source (string) vertex in the graph
@@ -114,7 +120,7 @@ def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") ->
# Filter out the paths that don't "enter" x; see the definition of a backdoor path
return list(filter(lambda l: l[0] in self.graph.children(l[1]) and l[1] != t, backdoor_paths))
- def all_dcf_sets(self, src: set, dst: set) -> list:
+ def all_dcf_sets(self, src: Vertices, dst: Vertices) -> List[Collection[str]]:
"""
Finds all Z subsets that serve as deconfounding sets between two sets of vertices, such as for the purpose of
measuring interventional distributions.
@@ -123,8 +129,11 @@ def all_dcf_sets(self, src: set, dst: set) -> list:
@return: A list of sets, each set representing a set of variables that are a sufficient Z set
"""
+ src_str = str_map(src)
+ dst_str = str_map(dst)
+
# Can't use anything in src, dst, or any descendant of any vertex in src as a deconfounding/blocking vertex
- disallowed_vertices = src | dst | set().union(*[self.graph.reach(s) for s in src])
+ disallowed_vertices = src_str | dst_str | set().union(*[self.graph.reach(s) for s in src_str])
valid_deconfounding_sets = list()
@@ -135,11 +144,11 @@ def all_dcf_sets(self, src: set, dst: set) -> list:
any_backdoor_paths = False
# Cross represents one (x in X, y in Y) tuple
- for s, t in product(src, dst):
+ for s, t in product(src_str, dst_str):
# Get any/all backdoor paths for this particular pair of vertices in src,dst with given potential
# deconfounding set
- backdoor_paths = self.backdoor_paths_pair(s, t, set(tentative_dcf))
+ backdoor_paths = self._backdoor_paths_pair(s, t, set(tentative_dcf))
if len(backdoor_paths) > 0:
any_backdoor_paths = True
@@ -155,7 +164,7 @@ def all_dcf_sets(self, src: set, dst: set) -> list:
return list(valid_deconfounding_sets)
- def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> list:
+ def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> List[Path]:
"""
Return a list of lists of all paths from a source to a target, with conditional movement from child to parent,
or parent to child.
@@ -173,7 +182,7 @@ def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> l
path_list = self.all_paths_cumulative(child, t, path + [s], path_list)
return path_list
- def independent(self, src: set, dst: set, dcf: set) -> bool:
+ def independent(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> bool:
"""
Helper function that makes some do_calculus logic more readable; determine if two sets are independent, given
some third set.
@@ -182,12 +191,17 @@ def independent(self, src: set, dst: set, dcf: set) -> bool:
@param dcf: A deconfounding set (of strings) Z, to block paths between X and Y
@return: True if there are no backdoor paths and no straight-line paths, False otherwise
"""
+
+ src_str = str_map(src)
+ dst_str = str_map(dst)
+ dcf_str = str_map(dcf) if dcf else set()
+
# Not independent if there are any unblocked backdoor paths
- if len(self.backdoor_paths(src, dst, dcf)) > 0:
+ if len(self.backdoor_paths(src_str, dst_str, dcf_str)) > 0:
return False
# Ensure no straight-line variables from any X -> Y or Y -> X
- for s, t in product(src, dst):
+ for s, t in product(src_str, dst_str):
if len(self.all_paths_cumulative(s, t, [], [])) != 0:
return False # x -> y
if len(self.all_paths_cumulative(t, s, [], [])) != 0:
diff --git a/do/probability/structures/CausalGraph.py b/do/structures/CausalGraph.py
similarity index 89%
rename from do/probability/structures/CausalGraph.py
rename to do/structures/CausalGraph.py
index 24028ff..9f10d18 100755
--- a/do/probability/structures/CausalGraph.py
+++ b/do/structures/CausalGraph.py
@@ -8,15 +8,16 @@
#########################################################
from itertools import product
+from typing import Collection, Union
from .BackdoorController import BackdoorController
from .Graph import Graph
from .Probability_Engine import ProbabilityEngine
from .VariableStructures import Outcome, Intervention
-from ...config.settings import Settings
-from ...util.OutputLogger import OutputLogger
-from ...util.helpers import p_str
+from ..config.settings import Settings
+from ..util.OutputLogger import OutputLogger
+from ..util.helpers import p_str
class CausalGraph:
@@ -44,16 +45,19 @@ def __init__(self, graph: Graph, variables: dict, outcomes: dict, tables: dict,
self.latent = latent.copy()
self.output = kwargs["output"] if "output" in kwargs else OutputLogger()
- def probability_query(self, head: set, body: set) -> float or None:
+ def probability_query(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]]) -> float:
"""
Compute a probability in the given model.
@param head: A set of Outcome objects
@param body: A set of Outcome and/or Intervention objects.
@return: A value in the range [0.0, 1.0] if the probability can be computed, None otherwise.
"""
- def strings(s: set):
+ def strings(s: Collection[Union[Outcome, Intervention]]):
return set(map(lambda x: x.name, s))
+ head = set(head)
+ body = set(body)
+
self.graph.reset_disabled()
# String representation of the given query
@@ -95,15 +99,13 @@ def strings(s: set):
# Filter down the deconfounding sets not overlapping with our query body
vertex_dcf = list(filter(lambda s: len(set(s) & strings(body)) == 0, deconfounding_sets))
- if len(vertex_dcf) == 0:
- self.output.result("No deconfounding set Z can exist for the given data.")
- return
+ assert len(vertex_dcf) != 0, "No deconfounding set Z can exist for the given data."
# Compute with every possible deconfounding set as a safety measure; ensuring they all match
probability = None # Sentinel value
for z_set in vertex_dcf:
- result = self._marginalize_query(head, body, interventions, z_set)
+ result = self._marginalize_query(head, body, z_set)
if probability is None: # Storing first result
probability = result
@@ -115,19 +117,21 @@ def strings(s: set):
self.graph.reset_disabled()
return probability
- def _marginalize_query(self, head: set, body: set, interventions: set, dcf: set) -> float:
+ def _marginalize_query(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]], dcf: Collection[str]) -> float:
"""
Handle the modified query where we require a deconfounding set due to Interventions / treatments.
@param head: The head of the query, a set containing Outcome objects
@param body: The body of the query, a set containing Outcome and Intervention objects
- @param interventions: A set containing Intervention objects; this should be a subset within body, of all
- Intervention objects in the query, since this should already have been found whenever this function is
- called.
@param dcf: A set of (string) names of variables to serve as a deconfounding set, blocking all backdoor paths
between the head and body
@return:
"""
+ head = set(head)
+ body = set(body)
+
+ interventions = set(filter(lambda x: isinstance(x, Intervention), body))
+
# Augment graph (isolating interventions as roots) and create engine
self.graph.disable_incoming(*interventions)
engine = ProbabilityEngine(self.graph, self.outcomes, self.tables)
diff --git a/do/probability/structures/ConditionalProbabilityTable.py b/do/structures/ConditionalProbabilityTable.py
similarity index 95%
rename from do/probability/structures/ConditionalProbabilityTable.py
rename to do/structures/ConditionalProbabilityTable.py
index e42c712..9fe24a0 100755
--- a/do/probability/structures/ConditionalProbabilityTable.py
+++ b/do/structures/ConditionalProbabilityTable.py
@@ -6,13 +6,14 @@
# #
#########################################################
-from numpy import empty
from math import floor, ceil
+from numpy import empty
+from typing import List
from .VariableStructures import Variable, Outcome, Intervention
-from ...config.settings import Settings
-from ...util.ProbabilityExceptions import MissingTableRow
+from ..config.settings import Settings
+from ..util.ProbabilityExceptions import MissingTableRow
class ConditionalProbabilityTable:
@@ -27,7 +28,7 @@ class ConditionalProbabilityTable:
# Padding units on the left/right sides of each cell
padding = 1
- def __init__(self, variable: Variable, given: list, table_rows: list):
+ def __init__(self, variable: Variable, given: List[str], table_rows: List):
self.variable = variable # The LHS of the table, single-variable only
self.given = given # The RHS/body of the table
diff --git a/do/probability/structures/Graph.py b/do/structures/Graph.py
similarity index 89%
rename from do/probability/structures/Graph.py
rename to do/structures/Graph.py
index 89140c5..0ffe0ea 100755
--- a/do/probability/structures/Graph.py
+++ b/do/structures/Graph.py
@@ -11,19 +11,16 @@
# We can isolate more generalized graph code here, as well as create a better way to "erase" incoming or outgoing
# edges, but only temporarily; this will improve "reach", "parents", etc.
-from typing import Union
+from typing import Collection, Set, Tuple, Union
-from .VariableStructures import Variable, Outcome, Intervention
-
-# These functions should work with any sort of Variable type, or the name itself
-CG_Types = Union[str, Variable, Outcome, Intervention]
+from .Types import V_Type
class Graph:
"""A basic graph, with edge control."""
- def __init__(self, v: set, e: set):
+ def __init__(self, v: Set[str], e: Set[Tuple[str, str]]):
"""
Initializer for a basic Graph.
@param v: A set of vertices
@@ -46,7 +43,7 @@ def __init__(self, v: set, e: set):
self.topology_map = {vertex: 0 for vertex in v}
- def initialize_topology(vertex: CG_Types, depth=0):
+ def initialize_topology(vertex: V_Type, depth=0):
"""
Helper function to initialize the ordering of the Variables in the graph
@param vertex: A Variable to set the ordering of, and then all its children
@@ -70,14 +67,14 @@ def __str__(self) -> str:
msg += "Edges:\n" + "\n".join(" -> ".join(i for i in edge) for edge in self.e)
return msg
- def roots(self) -> set:
+ def roots(self) -> Collection[str]:
"""
Get the roots of the the graph G.
@return: A set of vertices (strings) in G that have no parents.
"""
return set([x for x in self.v if len(self.parents(x)) == 0])
- def parents(self, v: CG_Types) -> set:
+ def parents(self, v: V_Type) -> Collection[Union[str, V_Type]]:
"""
Get the parents of v, which may actually be currently controlled
@param v: A variable in our graph
@@ -89,7 +86,7 @@ def parents(self, v: CG_Types) -> set:
return {p for p in self.incoming[label] if p not in self.outgoing_disabled and p not in self.outgoing[label]}
- def children(self, v: CG_Types) -> set:
+ def children(self, v: V_Type) -> Collection[Union[str, V_Type]]:
"""
Get the children of v, which may actually be currently controlled
@param v: A variable in our graph
@@ -101,7 +98,7 @@ def children(self, v: CG_Types) -> set:
return {c for c in self.outgoing[label] if c not in self.incoming_disabled and c not in self.incoming[label]}
- def ancestors(self, v: CG_Types) -> set:
+ def ancestors(self, v: V_Type) -> Collection[Union[str, V_Type]]:
"""
Get the ancestors of v, accounting for disabled vertices
@param v: The vertex to find all ancestors of
@@ -119,7 +116,7 @@ def ancestors(self, v: CG_Types) -> set:
return ancestors
- def reach(self, v: CG_Types) -> set:
+ def reach(self, v: V_Type) -> Collection[Union[str, V_Type]]:
"""
Get the reach of v, accounting for disabled vertices
@param v: The vertex to find all descendants of
@@ -137,7 +134,7 @@ def reach(self, v: CG_Types) -> set:
return set(children)
- def disable_outgoing(self, *disable: CG_Types):
+ def disable_outgoing(self, *disable: V_Type):
"""
Disable the given vertices' outgoing edges
@param disable: Any number of vertices to disable
@@ -145,7 +142,7 @@ def disable_outgoing(self, *disable: CG_Types):
for v in disable:
self.outgoing_disabled.add(to_label(v))
- def disable_incoming(self, *disable: CG_Types):
+ def disable_incoming(self, *disable: V_Type):
"""
Disable the given vertices' incoming edges
@param disable: Any number of vertices to disable
@@ -160,7 +157,7 @@ def reset_disabled(self):
self.outgoing_disabled.clear()
self.incoming_disabled.clear()
- def get_topology(self, v: CG_Types) -> int:
+ def get_topology(self, v: V_Type) -> int:
"""
Determine the "depth" a given Variable is at in a topological sort of the graph
@param v: The variable to determine the depth of
@@ -185,7 +182,7 @@ def __copy__(self):
copied.outgoing_disabled = self.outgoing_disabled.copy()
return copied
- def topological_variable_sort(self, variables: list) -> list:
+ def topological_variable_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]:
"""
A helper function to abstract what it means to "sort" a list of Variables/Outcomes/Interventions
@param variables: A list of any number of Variable/Outcome/Intervention instances
@@ -198,7 +195,7 @@ def topological_variable_sort(self, variables: list) -> list:
sorted_variables = [[v for v in variables if self.get_topology(v) == i] for i in range(largest_topology+1)]
return [item for topology_sublist in sorted_variables for item in topology_sublist]
- def descendant_first_sort(self, variables: list) -> list:
+ def descendant_first_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]:
"""
A helper function to "sort" a list of Variables/Outcomes/Interventions such that no element has a
"parent"/"ancestor" to its left
@@ -209,7 +206,7 @@ def descendant_first_sort(self, variables: list) -> list:
return self.topological_variable_sort(variables)[::-1]
-def to_label(item: CG_Types) -> str:
+def to_label(item: V_Type) -> str:
"""
Convert a variable to its string name, if not already provided as such
@param item: The item to convert, either a string (done) or some Variable
diff --git a/do/probability/structures/Probability_Engine.py b/do/structures/Probability_Engine.py
similarity index 95%
rename from do/probability/structures/Probability_Engine.py
rename to do/structures/Probability_Engine.py
index 56e834e..bd2060d 100755
--- a/do/probability/structures/Probability_Engine.py
+++ b/do/structures/Probability_Engine.py
@@ -8,14 +8,15 @@
#########################################################
from itertools import product
+from typing import Collection, Union
from .Graph import Graph
from .VariableStructures import Outcome, Intervention
-from ...config.settings import Settings
-from ...util.OutputLogger import OutputLogger
-from ...util.helpers import p_str
-from ...util.ProbabilityExceptions import ProbabilityException, ProbabilityIndeterminableException
+from ..config.settings import Settings
+from ..util.OutputLogger import OutputLogger
+from ..util.helpers import p_str
+from ..util.ProbabilityExceptions import ProbabilityException, ProbabilityIndeterminableException
class ProbabilityEngine:
@@ -33,7 +34,7 @@ def __init__(self, graph: Graph, outcomes: dict, tables: dict, **kwargs):
self.output = kwargs["output"] if "output" in kwargs else OutputLogger()
self._stored_computations = dict()
- def probability(self, head: set, body: set) -> float:
+ def probability(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]]) -> float:
"""
@param head: A set of Outcome objects representing the head of a query
@param body: A set of Outcome/Intervention objects representing the body of a query
@@ -42,6 +43,9 @@ def probability(self, head: set, body: set) -> float:
@raise AssertionError if there is an Intervention in the head
"""
+ head = set(head)
+ body = set(body)
+
# Ensure there are no adjustments/interventions in the head
for out in head:
assert not isinstance(out, Intervention), f"Error: {out} is in head; no Interventions should be in head."
@@ -59,7 +63,7 @@ def probability(self, head: set, body: set) -> float:
self.graph.disable_incoming(*interventions)
return self._compute(list(head), list(body))
- def _compute(self, head: list, body: list, depth=0) -> float:
+ def _compute(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]], depth=0) -> float:
"""
Compute the probability of some head given some body
@param head: A list of some number of Outcome objects
@@ -270,7 +274,7 @@ def _store_computation(self, string_representation: str, result: float):
print("Uh-oh:", string_representation, "has already been cached, but with a different value...")
-def contradictory_outcome_set(outcomes: list) -> bool:
+def contradictory_outcome_set(outcomes: Collection[Union[Outcome, Intervention]]) -> bool:
"""
Check whether a list of outcomes contain any contradictory values, such as Y = y and Y = ~y
@param outcomes: A list of Outcome objects
diff --git a/do/structures/Types.py b/do/structures/Types.py
new file mode 100644
index 0000000..c905963
--- /dev/null
+++ b/do/structures/Types.py
@@ -0,0 +1,11 @@
+from typing import Collection, List, NewType, Union
+
+from .VariableStructures import Variable, Outcome, Intervention
+
+# General
+V_Type = NewType("V_Type", Union[Variable, Outcome, Intervention])
+
+# Graph-related
+Vertex = NewType("Vertex", Union[V_Type, str])
+Vertices = NewType("Vertices", Collection[Vertex])
+Path = NewType("Path", List[Vertex])
diff --git a/do/probability/structures/VariableStructures.py b/do/structures/VariableStructures.py
similarity index 100%
rename from do/probability/structures/VariableStructures.py
rename to do/structures/VariableStructures.py
diff --git a/do/probability/structures/__init__.py b/do/structures/__init__.py
similarity index 92%
rename from do/probability/structures/__init__.py
rename to do/structures/__init__.py
index 9e8d9db..6ba87be 100644
--- a/do/probability/structures/__init__.py
+++ b/do/structures/__init__.py
@@ -4,5 +4,6 @@
"ConditionalProbabilityTable",
"Graph",
"Probability_Engine",
+ "Types",
"VariableStructures"
]
diff --git a/do/util/ModelLoader.py b/do/util/ModelLoader.py
index 53850b3..1815f4b 100755
--- a/do/util/ModelLoader.py
+++ b/do/util/ModelLoader.py
@@ -3,9 +3,9 @@
from typing import Union
from yaml import safe_load as yaml_load
-from ..probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
-from ..probability.structures.Graph import Graph
-from ..probability.structures.VariableStructures import Variable, Outcome, Intervention
+from ..structures.ConditionalProbabilityTable import ConditionalProbabilityTable
+from ..structures.Graph import Graph
+from ..structures.VariableStructures import Variable
def parse_model(file: Union[dict, str, Path]):
diff --git a/do/util/helpers.py b/do/util/helpers.py
index 5de6487..28d6ae6 100644
--- a/do/util/helpers.py
+++ b/do/util/helpers.py
@@ -1,7 +1,8 @@
from itertools import chain, combinations
-from typing import Iterator
+from typing import Collection, Iterator, Union
from ..config.settings import Settings
+from ..structures.Types import Intervention, Outcome, Vertices
def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]:
@@ -40,7 +41,7 @@ def disjoint(*sets) -> bool:
return len(set().union(*sets)) == sum(map(lambda iterable: len(iterable), sets))
-def p_str(lhs: list, rhs: list) -> str:
+def p_str(lhs: Collection[Outcome], rhs: Collection[Union[Outcome, Intervention]]) -> str:
"""
Convert a head&body to a properly-formatted string
@param lhs: The head/LHS of the query; a list of Outcome/Intervention objects
@@ -61,3 +62,7 @@ def within_precision(a: float, b: float) -> bool:
@return: True if the values are within the margin of error acceptable, False otherwise
"""
return abs(a - b) < 1 / (10 ** Settings.regression_levels_of_precision)
+
+
+def str_map(to_filter: Vertices):
+ return set(map(lambda v: v if isinstance(v, str) else v.name, to_filter))
diff --git a/doc/Getting Started.md b/doc/Getting Started.md
deleted file mode 100644
index dea636d..0000000
--- a/doc/Getting Started.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Getting Started
-
-How to install and set up the software.
-
-#### Table of Contents
-
-* [Installation](#installation)
-* [Setup](#setup)
-* [Running](#running)
-
-## Installation
-
-There are multiple ways to install the software: [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli).
-
-### Clone
-
-In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed.
-
-You can clone the repository using either the [**HTTPS**](#https) URL, or the [**SSH**](#ssh) URL. If you do not know which to choose, or do not intend to commit to the project, use [**HTTPS**](#https).
-
-#### HTTPS
-
-To clone with the **HTTPS** URL:
-
-```shell
-git clone https://github.com/bradendubois/probability-code.git
-```
-
-#### SSH
-
-To clone with the **SSH** URL:
-```shell
-git clone git@github.com:bradendubois/probability-code.git
-```
-
-### Release
-
-The project's [releases page](https://github.com/bradendubois/probability-code/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available.
-
-Releases: [https://github.com/bradendubois/probability-code/releases](https://github.com/bradendubois/probability-code/releases)
-
-Releases are automatically created and tagged using [semantic-release](https://github.com/semantic-release/semantic-release).
-
-### CLI
-
-To clone with the [GitHub CLI](https://cli.github.com/).
-
-```shell
-gh repo clone bradendubois/probability-code
-```
-
-## Setup
-
-Setup requirements for the project are:
-- **[Python 3.8+](https://www.python.org/)**
-- [**pip**](https://pip.pypa.io/en/stable/) is used to install [required packages](#python-requirements).
-
-**Note**: `pip` will already be installed with any installation of **Python 3.4+**.
-
-### Python Requirements
-
-At present, the only package not part of a default Python installation is [NumPy](https://numpy.org/). To install *numpy* exclusively:
-
-```shell
-pip install numpy
-```
-However, in the event that more packages become used, the more generalized following command will install all necessary packages in ``requirements.txt``:
-
-```shell
-pip install -r requirements.txt
-```
-
-## Running
-
-A basic [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) is available, and [[details can be found here|REPL]].
-
-An [API](https://en.wikipedia.org/wiki/API) is also available, and [[details can be found here|API]].
diff --git a/doc/Home.md b/doc/Home.md
deleted file mode 100644
index 45f6a1b..0000000
--- a/doc/Home.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# probability-code wiki
-
-This wiki is under construction, and most documentation is still under revision or subject to change in the near future.
-
-At present, the pages listed below are for the newer versions of the project under the ``develop`` branch, while other pages accessible from the sidebar are for the first tagged version of the project.
-
-#### Table of Contents
-
-* [[Getting Started]]
-* [[REPL]]
-* [[Causal Models]]
diff --git a/setup.cfg b/setup.cfg
index eab5072..0fc7713 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -11,13 +11,15 @@ exclude =
[coverage:run]
relative_files = True
-source = do/
+source =
+ do/
+ tests/
omit =
do/API.py
do/__main__.py
- do/config/*
+ do/config/generate_config_docs.py
+ do/config/config_manager.py
do/util/OutputLogger.py
- do/graphs/dataset_generator
[coverage:report]
exclude_lines =
diff --git a/setup.py b/setup.py
index be7284f..e1537c8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
from pathlib import Path
-from setuptools import find_packages, setup
+from setuptools import setup
from os import environ
cwd = Path(".")
diff --git a/tests/backdoors/backdoor_path_tests.py b/tests/backdoors/backdoor_path_tests.py
index dbf94e4..b96d340 100644
--- a/tests/backdoors/backdoor_path_tests.py
+++ b/tests/backdoors/backdoor_path_tests.py
@@ -3,11 +3,11 @@
from pathlib import Path
from yaml import safe_load as load
-from ..test_util import print_test_result
-
-from do.probability.structures.BackdoorController import BackdoorController
+from do.structures.BackdoorController import BackdoorController
from do.util.ModelLoader import parse_model
+from ..test_util import print_test_result
+
test_file_directory = Path(dirname(abspath(__file__))) / "test_files"
@@ -32,11 +32,11 @@ def model_backdoor_validation(bc: BackdoorController, test_data: dict) -> (bool,
expected_paths = list(map(sorted, test["expect"]))
- paths = []
- for s, t in itertools.product(test["src"], test["dst"]):
- paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {}))
+ src = test["src"]
+ dst = test["dst"]
+ dcf = test["dcf"] if "dcf" in test else set()
- # Sort each path to improve some sor
+ paths = bc.backdoor_paths(src, dst, dcf)
paths = list(map(sorted, paths))
if test["exhaustive"] and len(paths) != len(expected_paths): # coverage: skip
diff --git a/tests/inference/inference_tests.py b/tests/inference/inference_tests.py
index 46b182b..fa3464b 100755
--- a/tests/inference/inference_tests.py
+++ b/tests/inference/inference_tests.py
@@ -2,14 +2,14 @@
from pathlib import Path
from yaml import safe_load as load
-from ..test_util import print_test_result
-
-from do.probability.structures.CausalGraph import CausalGraph, Outcome
-from do.probability.structures.VariableStructures import parse_outcomes_and_interventions
+from do.structures.CausalGraph import CausalGraph, Outcome
+from do.structures.VariableStructures import parse_outcomes_and_interventions
from do.util.helpers import within_precision
from do.util.ModelLoader import parse_model
from do.util.ProbabilityExceptions import *
+from ..test_util import print_test_result
+
test_file_directory = Path(dirname(abspath(__file__))) / "test_files"
@@ -89,12 +89,25 @@ def inference_tests(graph_location: Path) -> (bool, str):
head = parse_outcomes_and_interventions(test["head"])
body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set()
- result = cg.probability_query(head, body)
expected = test["expect"]
- if expected != "failure" and not within_precision(result, expected): # coverage: skip
- print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}")
- test_file_success = False
+ try:
+
+ result = cg.probability_query(head, body)
+
+ # Should have raised assertion error...
+ if expected == "failure":
+ print_test_result(False, f"Expected test to fail, but it did not! {graph_filename}")
+ test_file_success = False
+
+ if expected != "failure" and not within_precision(result, expected): # coverage: skip
+ print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}")
+ test_file_success = False
+
+ except AssertionError:
+ if expected != "failure":
+ print_test_result(False, f"Unexpected assertion error! {graph_filename}")
+ test_file_success = False
if test_file_success:
print_test_result(True, f"All tests in {test_file}|{graph_filename} passed")
diff --git a/tests/test_driver.py b/tests/test_driver.py
index 1c73cde..cbe4053 100644
--- a/tests/test_driver.py
+++ b/tests/test_driver.py
@@ -7,11 +7,11 @@
from do.api.joint_distribution_table import api_joint_distribution_table
from do.api.probability_query import api_probability_query, api_probability_query_parse
-from do.probability.structures.BackdoorController import BackdoorController
-from do.probability.structures.CausalGraph import CausalGraph
-from do.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
-from do.probability.structures.Graph import Graph, to_label
-from do.probability.structures.VariableStructures import Outcome, Variable, Intervention
+from do.structures.BackdoorController import BackdoorController
+from do.structures.CausalGraph import CausalGraph
+from do.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
+from do.structures.Graph import Graph, to_label
+from do.structures.VariableStructures import Outcome, Variable, Intervention
from do.util.helpers import power_set, disjoint, minimal_sets, within_precision
from do.util.ModelLoader import parse_model
@@ -27,7 +27,7 @@
default_model_file = "pearl-3.4.yml"
# Default location for the graphs made by hand
-graphs = Path("do", "graphs", "full")
+graphs = Path("do", "graphs")
# Path to the Xi-Xj model
test_file = graphs / default_model_file
@@ -175,7 +175,7 @@ def test_probability_lookup():
try:
assert t.probability_lookup(Outcome("Xj", "foo"), priors) == 100
- raise Exception
+ raise Exception # coverage: skip
except MissingTableRow:
pass
@@ -426,14 +426,14 @@ def test_parse_model():
# nonexistent file
try:
parse_model(Path("fake", "path", "fake"))
- raise Exception
+ raise Exception # coverage: skip
except FileNotFoundError:
pass
# invalid file
try:
parse_model(Path("do", "util", "helpers.py"))
- raise Exception
+ raise Exception # coverage: skip
except FileNotFoundError:
pass
diff --git a/wiki/Backdoor Paths.md b/wiki/Backdoor Paths.md
new file mode 100644
index 0000000..ef2a768
--- /dev/null
+++ b/wiki/Backdoor Paths.md
@@ -0,0 +1,56 @@
+How to discover backdoor paths between two sets of variables in a given [[causal model|Causal Models]].
+
+## Basic Backdoor Paths
+
+Assume the following model uses the graph **G = (V, E)**, where:
+- **V** = ``{x, y, z}``
+- **E** = ``{(x, y), (z, x), (z, y)}``
+
+```python
+from do.API import Do
+
+# Assume this were a detailed model conforming to the above graph...
+model = dict()
+
+do_api = Do(model)
+
+backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set())
+
+for path in backdoor_paths:
+ print(f"Backdoor path from x->y!: {path}")
+```
+
+``backdoor_paths`` returns a list of lists, in which each sub-list consists of the vertices (end-points included) connecting some vertex in the ``src`` set to some vertex in the ``dst`` set.
+- In this example, the return value would be ``[["x", "z", "y"]]``, as this denotes the singular backdoor path ``x <- z -> y``.
+
+**Important**
+- The first parameter is the set of source variables from which the pathfinding begins.
+- The second parameter is the set of destination variables to which the pathfinding attempts to reach.
+- A third parameter is a set of *deconfounding* variables by which to "block" backdoor paths.
+- The deconfounding set currently must be given, even if empty.
+- Each sublist, a backdoor path, is ordered such that the path order is correctly maintained.
+
+## Deconfounding Variables
+
+Assuming the same graph as defined [above](#basic-backdoor-paths)...
+
+```python
+from do.API import Do
+
+# Assume this were a detailed model conforming to the above graph...
+model = dict()
+
+do_api = Do(model)
+
+backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set())
+
+for path in backdoor_paths:
+ print(f"Backdoor path from x->y!: {path}")
+
+blocked = do_api.backdoor_paths({"x"}, {"y"}, {"z"})
+
+assert len(blocked) == 0
+```
+
+**Important**
+- If all backdoor paths are successfully blocked, an **empty list** is returned.
diff --git a/doc/Causal Models.md b/wiki/Causal Models.md
similarity index 53%
rename from doc/Causal Models.md
rename to wiki/Causal Models.md
index 256ee0e..4409c8c 100755
--- a/doc/Causal Models.md
+++ b/wiki/Causal Models.md
@@ -1,16 +1,11 @@
-# Causal Models
+This document outlines the structure of how to create a causal model for use in the package, such as in the [[API|Do API]].
-This document outlines the structure of how to create a causal model.
-
-Models are inherently **DAGs**, where each variable in a model is also represented as a vertex in the DAG.
-
-Models can be stored in ``json`` and ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions.
-- The default graph folder is ``src/graphs/full``.
+Models are inherently **DAGs** (Directed Acyclic Graph), where each variable in a model is also represented as a vertex in the DAG.
## Model Structure
-The graph file must be structured such that a key with value ``model`` is present, and corresponds to a list, where each item is itself a dictionary, representing one variable in the model.
-- Each variable in the model is represented by a unique key representing the variable's name, and corresponds to the following key-value pairs:
+A model is represented as dictionary, mapping the name of one variable in the model to its detailed information.
+- A variable's detailed information consists of the following key-value pairs:
- ``outcomes``: all discrete outcomes the variable may take, represented as a list.
- ``parents``: parent variables (also defined in the model) of the current variable, represented as a list.
- If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely.
@@ -19,7 +14,16 @@ The graph file must be structured such that a key with value ``model`` is presen
- ``latent``: a boolean representing whether the variable is unobservable in the given model.
- If this key is absent, it will be assumed ``False`` - that is, assumed observable.
-Additionally, a key ``model`` can be given, corresponding to an arbitrary name for the model.
+Additionally, a key ``name`` can be given, corresponding to an arbitrary name for the model.
+
+## Files
+
+Models can be stored in ``json`` or ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions.
+- A handful of models are stored in ``do/graphs``.
+
+## Dictionaries
+
+A model can also be stored as a Python dictionary directly, and loaded into an instance of the [[API|Do API]].
### Example
@@ -52,3 +56,34 @@ model:
This represents the basic graph of a single edge, (Y, X).
- In the absence of any ``latent`` attributes, both variables are observable.
- ``Y`` has no parents, it is a root.
+
+#### Dictionary
+
+Here is the [above example](#example), represented as a Python dictionary.
+
+```py
+m = {
+ "name": "Simple Model",
+ "model": {
+ "Y": {
+ "outcomes": ["y", "~y"],
+ "table": [
+ ["y", 0.7],
+ ["~y", 0.3]
+ ]
+ },
+ "X": {
+ "outcomes": ["x", "~x" ],
+ "parents": [ "Y" ],
+ "table": [
+ ["x", "y", 0.9],
+ ["x", "~y", 0.75],
+ ["~x", "y", 0.1],
+ ["~x", "~y", 0.25]
+ ]
+ }
+ }
+}
+```
+
+Both representations be used in the [[API|Do API]].
diff --git a/doc/Configuration.md b/wiki/Configuration.md
similarity index 94%
rename from doc/Configuration.md
rename to wiki/Configuration.md
index e99f115..65f8cc3 100644
--- a/doc/Configuration.md
+++ b/wiki/Configuration.md
@@ -1,6 +1,4 @@
-# Configuration File Settings
-
-Settings for the project are stored in ``src/config/config.yml``.
+Settings for the project are stored in ``config.yml`` in the same directory as the Python file that imports ``Do``.
- **Note**: This file will be created if it does not exist, when the project is run.
## Output Control
diff --git a/wiki/Deconfounding Sets.md b/wiki/Deconfounding Sets.md
new file mode 100644
index 0000000..ab258fe
--- /dev/null
+++ b/wiki/Deconfounding Sets.md
@@ -0,0 +1,46 @@
+# Deconfounding Sets
+
+Finding all deconfounding sets between two sets of vertices.
+
+## Basic Example
+
+Assuming the basic 3-vertex graph from [[Backdoor Paths]], **G = (V, E)** where:
+- **V** = ``{x, y, z}``
+- **E** = ``{(x, y), (z, x), (z, y)}``
+
+```python
+from do.API import Do
+
+# Assume this were a detailed model conforming to the above graph...
+model = dict()
+
+do_api = Do(model)
+
+dcf = do_api.deconfounding_sets({"x"}, {"y"})
+
+for deconfounding_set in dcf:
+ print(f"Deconfounding set for x->y!: {deconfounding_set}")
+```
+
+**Important**:
+- ``deconfounding_sets`` takes a *source* set of variables, and a *destination/target* set of variables.
+- A list of sets is returned, where each set consists of one possible set by which to block all deconfounding paths.
+
+## Usage of Deconfounding Sets
+
+Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handles deconfounding. An easy check to verify each deconfounding set:
+
+
+```python
+from do.API import Do
+
+# Assume this were a more complicated model
+model = dict()
+
+do_api = Do(model)
+
+dcf = do_api.deconfounding_sets({"x"}, {"y"})
+
+for deconfounding_set in dcf:
+ assert len(do_api.backdoor_paths({"x"}, {"y"}, deconfounding_set)) == 0
+```
diff --git a/wiki/Do API.md b/wiki/Do API.md
new file mode 100644
index 0000000..00a6577
--- /dev/null
+++ b/wiki/Do API.md
@@ -0,0 +1,79 @@
+Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project.
+
+This assumes the steps in the [[Installation]] section have been followed, and the project is set up.
+
+**Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package.
+
+## Table of Contents
+
+* [Importing the **Do** API](#importing)
+* [Loading a Model](#loading-a-model)
+
+## Importing
+
+To import the package:
+
+```python
+import do
+```
+
+**Important**:
+- The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``.
+
+
+
+To import *just* the API:
+
+```python
+from do.API import Do
+```
+
+**Important**:
+- The API, represented as a Python class, is called **Do**.
+- **Do** is stored in the file ``API``, so it can be imported from ``do.API``.
+
+## Loading a Model
+
+Let's create an instance of the API, using the model from [[Installation]]:
+
+```python
+from do.API import Do
+
+m = {
+ "name": "Simple Model",
+ "model": {
+ "Y": {
+ "outcomes": ["y", "~y"],
+ "table": [
+ ["y", 0.7],
+ ["~y", 0.3]
+ ]
+ },
+ "X": {
+ "outcomes": ["x", "~x" ],
+ "parents": [ "Y" ],
+ "table": [
+ ["x", "y", 0.9],
+ ["x", "~y", 0.75],
+ ["~x", "y", 0.1],
+ ["~x", "~y", 0.25]
+ ]
+ }
+ }
+}
+
+x = Do(m)
+```
+
+**Important**:
+- A regular Python dictionary representation of a [[causal model|Causal Models]] is valid input to **Do**.
+- Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time.
+
+## Further
+
+Now that a model is successfully loaded, one can begin [[querying distributions|Probability Queries]].
+
+See any of the more specific pages:
+* [[Probability Queries]]
+* [[Backdoor Paths]]
+* [[Deconfounding Sets]]
diff --git a/wiki/GitHub.md b/wiki/GitHub.md
new file mode 100644
index 0000000..7d78b16
--- /dev/null
+++ b/wiki/GitHub.md
@@ -0,0 +1,54 @@
+Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki).
+
+## Acquiring a Copy
+
+To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli).
+
+After a copy has been acquired, [install the extra dependencies](#extra-dependencies).
+
+## Clone
+
+In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed.
+
+You can clone the repository using either the **HTTPS** or **SSH** URL. If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**.
+
+To clone with the **HTTPS** URL:
+
+```shell
+git clone https://github.com/bradendubois/do-calculus.git
+```
+
+To clone with the **SSH** URL:
+```shell
+git clone git@github.com:bradendubois/do-calculus.git
+```
+
+## Release
+
+The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available.
+
+**Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases)
+
+Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release).
+
+## CLI
+
+To clone with the [GitHub CLI](https://cli.github.com/).
+
+```shell
+gh repo clone bradendubois/do-calculus
+```
+
+## Extra Dependencies
+
+After acquiring a copy from any of the above steps:
+
+```shell
+pip install -r requirements.txt
+```
+
+The above command will install all dependencies listed in ``requirements.txt``.
+
+## Further
+
+An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]].
diff --git a/wiki/Home.md b/wiki/Home.md
new file mode 100644
index 0000000..084bc8c
--- /dev/null
+++ b/wiki/Home.md
@@ -0,0 +1,5 @@
+# do-calculus wiki
+
+This wiki is *under construction*, and most documentation is still a work in progress.
+
+See the Sidebar for relevant links.
diff --git a/wiki/Installation.md b/wiki/Installation.md
new file mode 100644
index 0000000..890eedb
--- /dev/null
+++ b/wiki/Installation.md
@@ -0,0 +1,20 @@
+How to install and set up the software.
+
+## Table of Contents
+
+* [Requirements](#requirements)
+* [Options](#options)
+
+## Requirements
+
+Setup requirements for the project are:
+- **[Python 3.8+](https://www.python.org/)**
+- [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages.
+
+**Note**: `pip` will already be installed with any installation of **Python 3.4+**.
+
+## Options
+
+There are **two** main ways to install the package:
+- [[Install from PyPI|PyPI]]
+- [[Install from source|GitHub]]
diff --git a/wiki/Literature.md b/wiki/Literature.md
new file mode 100644
index 0000000..151d30b
--- /dev/null
+++ b/wiki/Literature.md
@@ -0,0 +1,11 @@
+TODO - References galore to backdoor paths, deconfounding, and more!
+
+## Books
+
+* Causality (2nd Edition) - Judea Pearl, 2009
+* The Book of Why: The New Science of Cause and Effect - Judea Pearl and Dana Mackenzie, 2018
+* Causal Inference in Statistics: A Primer - Judea Pearl, Madelyn Glymour, Nicholas P. Jewell, 2016
+
+## Papers
+
+TODO - Shpitser & Pearl 2004, Thesis, and a few more.
diff --git a/wiki/Probability Queries.md b/wiki/Probability Queries.md
new file mode 100644
index 0000000..a98d030
--- /dev/null
+++ b/wiki/Probability Queries.md
@@ -0,0 +1,89 @@
+How to measure probabilities using the **Do** API.
+
+## Making a Query
+
+For this, we will query a standard probability through the **Do** API.
+
+```python
+from do.API import Do
+from do.structures.VariableStructures import Outcome
+
+m = {
+ "name": "Simple Model",
+ "model": {
+ "Y": {
+ "outcomes": ["y", "~y"],
+ "table": [
+ ["y", 0.7],
+ ["~y", 0.3]
+ ]
+ },
+ "X": {
+ "outcomes": ["x", "~x" ],
+ "parents": [ "Y" ],
+ "table": [
+ ["x", "y", 0.9],
+ ["x", "~y", 0.75],
+ ["~x", "y", 0.1],
+ ["~x", "~y", 0.25]
+ ]
+ }
+ }
+}
+
+do_api = Do(m)
+
+x = Outcome("X", "x")
+y = Outcome("Y", "y")
+
+x_alone = do_api.p({x}, set())
+print(f"The probability of X=x, P(X=x) = {x_alone:5}")
+
+x_if_y = do_api.p({x}, {y})
+print(f"The probability of P(X=x | Y=y) = {x_if_y:5}")
+
+x_and_y = do_api.p({x, y}, set())
+print(f"The probability of P(X=x, Y=y) = {x_and_y:5}")
+```
+
+**Important**:
+- The representation of a variable in the model having some *observed* value is implemented as an **Outcome** object.
+- The creation of an Outcome object is to supply the *name* of the variable, and *some outcome of this variable*.
+- The Outcome class is located at ``do.structures.VariableStructures``.
+- The API function provided in **Do** to query a probability is the ``p`` function.
+- **Do.p** takes *two* arguments, a *Collection of outcome outcomes*, and a *Collection of "given" outcomes*.
+- **Do.p** requires an empty set as its "given" outcomes even if there are none.
+- **Do.p** returns a *float*, between [0, 1].
+
+## Querying an Interventional Distribution
+
+Assume the existence of some more complicated model, ``m_confounded``, in which multiple variables are susceptible to *backdoor paths* or *confounding*, but a sufficient *deconfounding set* can block all backdoor paths.
+- See [[Literature]] for more details on *backdoor paths* and *deconfounding*.
+
+```python
+from do.API import Do
+from do.structures.VariableStructures import Outcome, Intervention
+
+# Assume this were some more complicated model...
+m_confounding = dict()
+
+do_api = Do(m_confounding)
+
+x = Outcome("X", "x")
+
+y_outcome = Outcome("Y", "y")
+y_intervention = Intervention("Y", "y")
+
+x_y = do_api.p({x}, {y_outcome})
+x_do_y = do_api.p({x}, {y_intervention})
+
+if x_y != x_do_y:
+ print(f"P(X=x | Y=y) ({x_y:5}) != P(X=x | do(Y=y)) ({x_do_y:5}): Y shows causal influence over X!")
+```
+
+**Important**:
+- A *treatment* or *intervention* is represented by the **Intervention** object.
+- The Intervention class is located at ``do.structures.VariableStructures``, the same as the Outcome class.
+- The Intervention class takes the same arguments as the Outcome class.
+- Queries involving interventions use **Do.p** just as standard queries do.
+- The "given" / body of a query is a *Collection* of Outcomes and Interventions.
diff --git a/wiki/PyPI.md b/wiki/PyPI.md
new file mode 100644
index 0000000..9fd0365
--- /dev/null
+++ b/wiki/PyPI.md
@@ -0,0 +1,29 @@
+Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/).
+
+## PyPI Package
+
+The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/).
+
+To install from [PyPI](https://pypi.org/) as a package:
+
+```shell
+pip install do-calculus
+```
+
+## Upgrade
+
+To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag:
+
+```shell
+pip install -U do-calculus
+```
+
+## PyPI Release Cycle
+
+By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions).
+
+Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project.
+
+*Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days.
+
+See the [[API|Do API]] page for importing and using the package once installed.
diff --git a/wiki/Resources.md b/wiki/Resources.md
new file mode 100644
index 0000000..4eb50c3
--- /dev/null
+++ b/wiki/Resources.md
@@ -0,0 +1,5 @@
+A collection of resources for information the project, or *do-calculus* generally.
+
+* [[Configuration]]: Settings for the project.
+* [[Causal Models]]: Details on the structure of a causal model for use in the package.
+* [[Literature]]: Books and papers referenced in the implementation of this project.
diff --git a/wiki/_Sidebar.md b/wiki/_Sidebar.md
new file mode 100644
index 0000000..8761208
--- /dev/null
+++ b/wiki/_Sidebar.md
@@ -0,0 +1,15 @@
+### [[Home]]
+
+### [[Installation]]
+* [[PyPI]]
+* [[GitHub]]
+
+### [[Resources]]
+* [[Configuration]]
+* [[Causal Models]]
+* [[Literature]]
+
+### [[Do API]]
+* [[Probability Queries]]
+* [[Backdoor Paths]]
+* [[Deconfounding Sets]]