diff --git a/.github/workflows/update_wiki.yml b/.github/workflows/update_wiki.yml index 2396421..4c235f7 100644 --- a/.github/workflows/update_wiki.yml +++ b/.github/workflows/update_wiki.yml @@ -5,7 +5,7 @@ on: push: # Trigger only when wiki directory changes paths: - - 'doc/**' + - 'wiki/**' # Trigger only on main/beta branches: [ main ] @@ -19,7 +19,7 @@ jobs: - name: Push Wiki Changes uses: Andrew-Chen-Wang/github-wiki-action@v2 env: - WIKI_DIR: doc/ + WIKI_DIR: docs/ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_MAIL: ${{ secrets.EMAIL }} GH_NAME: ${{ github.repository_owner }} diff --git a/README.md b/README.md index db39144..18fd54f 100755 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@

do-calculus

A Python implementation of the do-calculus of Judea Pearl et. al.

- - Test Workflows + + Test Workflows - - Coverage Status + + Coverage Status @@ -20,11 +20,11 @@ ## Resources -* **Documentation / Wiki**: [github.com/bradendubois/probability-code/wiki](https://github.com/bradendubois/probability-code/wiki) -* **Source Code**: [github.com/bradendubois/probability-code](https://github.com/bradendubois/probability-code) +* **Documentation / Wiki**: [github.com/bradendubois/do-calculus/wiki](https://github.com/bradendubois/do-calculus/wiki) +* **Source Code**: [github.com/bradendubois/do-calculus](https://github.com/bradendubois/do-calculus) * **PyPI**: [pypi.org/project/do-calculus/](https://pypi.org/project/do-calculus/) -* **Releases**: [github.com/bradendubois/probability-code/releases](https://github.com/bradendubois/probability-code/releases) -* **Bug reports**: [github.com/bradendubois/probability-code/issues](https://github.com/bradendubois/probability-code/issues) +* **Releases**: [github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases) +* **Bug reports**: [github.com/bradendubois/do-calculus/issues](https://github.com/bradendubois/do-calculus/issues) * **Contact**: [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca) -See the [wiki](https://github.com/bradendubois/probability-code/wiki) to get started. +See the [wiki](https://github.com/bradendubois/do-calculus/wiki) to get started. diff --git a/doc/REPL.md b/debug/REPL.md similarity index 100% rename from doc/REPL.md rename to debug/REPL.md diff --git a/do/graphs/dataset_generator/distribution_generation.py b/debug/dataset_generator/distribution_generation.py similarity index 100% rename from do/graphs/dataset_generator/distribution_generation.py rename to debug/dataset_generator/distribution_generation.py diff --git a/do/graphs/dataset_generator/graph_generator.py b/debug/dataset_generator/graph_generator.py similarity index 100% rename from do/graphs/dataset_generator/graph_generator.py rename to debug/dataset_generator/graph_generator.py diff --git a/do/graphs/dataset_generator/model_generator.py b/debug/dataset_generator/model_generator.py similarity index 98% rename from do/graphs/dataset_generator/model_generator.py rename to debug/dataset_generator/model_generator.py index eba211c..a0978e4 100755 --- a/do/graphs/dataset_generator/model_generator.py +++ b/debug/dataset_generator/model_generator.py @@ -26,7 +26,7 @@ except ValueError: print("Could not convert", argv[1], "to int; defaulting to", N) -destination_directory = Path(".", argv[2]) +destination_directory = Path("", argv[2]) if not destination_directory.is_dir(): print("Cannot resolve", destination_directory) diff --git a/do/graphs/minimal/fumigants.txt b/debug/minimal/fumigants.txt similarity index 100% rename from do/graphs/minimal/fumigants.txt rename to debug/minimal/fumigants.txt diff --git a/do/graphs/minimal/sunscreen.txt b/debug/minimal/sunscreen.txt similarity index 100% rename from do/graphs/minimal/sunscreen.txt rename to debug/minimal/sunscreen.txt diff --git a/do/API.py b/do/API.py index c79a536..2e83284 100644 --- a/do/API.py +++ b/do/API.py @@ -2,7 +2,7 @@ # probability-code API # ########################################################### -from typing import Union +from typing import Collection, List, Optional, Set, Union from pathlib import Path from .api.backdoor_paths import api_backdoor_paths @@ -10,10 +10,11 @@ from .api.joint_distribution_table import api_joint_distribution_table from .api.probability_query import api_probability_query -from .probability.structures.BackdoorController import BackdoorController -from .probability.structures.CausalGraph import CausalGraph -from .probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable -from .probability.structures.VariableStructures import Variable +from .structures.BackdoorController import BackdoorController +from .structures.CausalGraph import CausalGraph +from .structures.ConditionalProbabilityTable import ConditionalProbabilityTable +from .structures.Types import Vertex, Vertices +from .structures.VariableStructures import Outcome, Intervention from .util.ModelLoader import parse_model from .util.OutputLogger import OutputLogger @@ -32,6 +33,9 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False, an argument to log_fd, or can be done later with a call to set_log_fd. @param log_fd: An open file descriptor to write to, if log_details is enabled. """ + self._print_result = print_result + self._output = OutputLogger(print_result, print_detail, log, log_fd) + if model: self.load_model(model) @@ -40,9 +44,6 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False, self._g = None self._bc = None - self._print_result = print_result - self._output = OutputLogger(print_result, print_detail, log, log_fd) - ################################################################ # API Modifications # ################################################################ @@ -93,7 +94,7 @@ def set_log_fd(self, log_fd): # Distributions # ################################################################ - def p(self, y: set, x: set) -> float: + def p(self, y: Collection[Outcome], x: Collection[Union[Outcome, Intervention]]) -> Optional[float]: """ Compute a probability query of Y, given X. @param y: Head of query; a set of Outcome objects @@ -101,11 +102,15 @@ def p(self, y: set, x: set) -> float: @return: The probability of P(Y | X), in the range [0.0, 1.0] @raise ProbabilityException when the given probability cannot be computed, such as an invalid Outcome """ - # All deconfounding is handled by the CG - result = api_probability_query(self._cg, y, x) - self._output.result(result) + try: + # All deconfounding is handled by the CG + result = api_probability_query(self._cg, y, x) + self._output.result(result) + return result - return result + except AssertionError as e: + self._output.detail(e) + return None def joint_distribution_table(self) -> ConditionalProbabilityTable: """ @@ -113,7 +118,7 @@ def joint_distribution_table(self) -> ConditionalProbabilityTable: @return: A list of tuples, (Outcomes, P), where Outcomes is a unique set of Outcome objects for the model, and P is the corresponding probability. """ - result = api_joint_distribution_table(self._cg) + result: ConditionalProbabilityTable = api_joint_distribution_table(self._cg) if self._print_result: keys = sorted(self._cg.variables.keys()) @@ -126,7 +131,7 @@ def joint_distribution_table(self) -> ConditionalProbabilityTable: # Pathfinding (Backdoor Controller) # ################################################################ - def backdoor_paths(self, src: set, dst: set, dcf: set) -> list: + def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]: """ Find all the "backdoor paths" between two sets of variables. @param src: A set of (string) vertices defined in the loaded model, which will be the source to begin searching @@ -145,9 +150,10 @@ def backdoor_paths(self, src: set, dst: set, dcf: set) -> list: for left, right in zip(path[:-1], path[1:]): print(left, "<-" if right in self._g.parents(left) else "->", end=" ") print(path[-1]) + return result - def deconfounding_sets(self, src: set, dst: set) -> list: + def deconfounding_sets(self, src: set, dst: set) -> List[Set[str]]: """ Find the sets of vertices in the loaded model that are sufficient at blocking all backdoor paths from all vertices in src to any vertices in dst diff --git a/do/__main__.py b/do/__main__.py index 3df490b..583df22 100644 --- a/do/__main__.py +++ b/do/__main__.py @@ -12,7 +12,7 @@ # there being path issues depending on the working directory -def main(graph_location=Path(".", "src", "graphs", "full")): +def main(graph_location=Path(".", "graphs")): """ Run an interactive IO prompt allowing full use of the causality software. @param graph_location: A string of the path from the working directory to a directory of graphs diff --git a/do/api/backdoor_paths.py b/do/api/backdoor_paths.py index c4ef959..2071c8e 100644 --- a/do/api/backdoor_paths.py +++ b/do/api/backdoor_paths.py @@ -1,9 +1,10 @@ -from itertools import product +from typing import Collection, Dict, List, Optional -from ..probability.structures.BackdoorController import BackdoorController +from ..structures.BackdoorController import BackdoorController +from ..structures.Types import Path, Vertices -def api_backdoor_paths_parse(query: str) -> (set, set): +def api_backdoor_paths_parse(query: str) -> Dict[str, Collection[str]]: """ Convert a given query string into a pair of sets to compute all backdoor paths between @param query: A string of the form "X, Y, Z -> A, B, C" or "X, Y, Z -> A, B, C | I, J, K" @@ -28,7 +29,7 @@ def clean(x): } -def api_backdoor_paths(bc: BackdoorController, src: set, dst: set, dcf: set) -> list: +def api_backdoor_paths(bc: BackdoorController, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]: """ Compute and return all the backdoor paths from any vertex in src to any vertex in dst @param bc: A Backdoor Controller with a graph conforming to the given source and destination sets. @@ -42,8 +43,4 @@ def api_backdoor_paths(bc: BackdoorController, src: set, dst: set, dcf: set) -> list containing each vertex (as a string) from the source vertex to the destination vertex, with dcf acting as a deconfounding set. """ - # TODO Add a method in Backdoor Controller that can return all paths immediately - paths = [] - for s, t in product(src, dst): - paths += bc.backdoor_paths_pair(s, t, dcf) - return paths + return bc.backdoor_paths(src, dst, dcf) diff --git a/do/api/deconfounding_sets.py b/do/api/deconfounding_sets.py index 8d6eff5..fa79843 100644 --- a/do/api/deconfounding_sets.py +++ b/do/api/deconfounding_sets.py @@ -1,7 +1,10 @@ -from ..probability.structures.BackdoorController import BackdoorController +from typing import Collection, Dict, List, Set +from ..structures.BackdoorController import BackdoorController +from ..structures.Types import Vertices -def api_deconfounding_sets_parse(query: str) -> (set, set): + +def api_deconfounding_sets_parse(query: str) -> Dict[str, Collection[str]]: """ Convert a given query string into a pair of sets to find all sufficient deconfounding sets between. @param query: A string of the form "X, Y, Z -> A, B, C" @@ -19,7 +22,7 @@ def clean(x): } -def api_deconfounding_sets(bc: BackdoorController, src: set, dst: set) -> list: +def api_deconfounding_sets(bc: BackdoorController, src: Vertices, dst: Vertices) -> List[Set[str]]: """ Compute and return all the backdoor paths from any vertex in src to any vertex is dst @param bc: A Backdoor Controller with a graph conforming to the given source and destination sets. diff --git a/do/api/joint_distribution_table.py b/do/api/joint_distribution_table.py index b4dfcba..5afb8fa 100644 --- a/do/api/joint_distribution_table.py +++ b/do/api/joint_distribution_table.py @@ -1,8 +1,8 @@ from itertools import product -from ..probability.structures.CausalGraph import CausalGraph -from ..probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable -from ..probability.structures.VariableStructures import Outcome, Variable +from ..structures.CausalGraph import CausalGraph +from ..structures.ConditionalProbabilityTable import ConditionalProbabilityTable +from ..structures.VariableStructures import Outcome, Variable def api_joint_distribution_table(cg: CausalGraph) -> ConditionalProbabilityTable: diff --git a/do/api/probability_query.py b/do/api/probability_query.py index 58c6803..499a9cd 100644 --- a/do/api/probability_query.py +++ b/do/api/probability_query.py @@ -1,8 +1,10 @@ -from ..probability.structures.CausalGraph import CausalGraph -from ..probability.structures.VariableStructures import parse_outcomes_and_interventions +from typing import Collection, Dict, Union +from ..structures.CausalGraph import CausalGraph +from ..structures.VariableStructures import Outcome, Intervention, parse_outcomes_and_interventions -def api_probability_query_parse(query: str) -> (tuple, tuple): + +def api_probability_query_parse(query: str) -> Dict[str, Collection[str]]: """ Parse a query string into Outcome and Intervention structures. @param query: A string of the form "Y=y, X=x | W=w", or just "Y=y, X=x" @@ -21,7 +23,7 @@ def api_probability_query_parse(query: str) -> (tuple, tuple): } -def api_probability_query(cg: CausalGraph, y: set, x: set) -> float: +def api_probability_query(cg: CausalGraph, y: Collection[Outcome], x: Collection[Union[Outcome, Intervention]]) -> float: """ Compute a probability query for the currently loaded causal graph. @param cg: A Causal Graph containing variables, distributions, etc. diff --git a/do/config/config_manager.py b/do/config/config_manager.py index c798f1f..7660e99 100755 --- a/do/config/config_manager.py +++ b/do/config/config_manager.py @@ -1,7 +1,7 @@ from pathlib import Path from yaml import safe_load as load, dump -from ..config.primary_configuration import * +from ..config.primary_configuration import primary_config_file path = Path(".", "config.yml") diff --git a/do/config/generate_config_docs.py b/do/config/generate_config_docs.py index 8c10886..bd01cbe 100755 --- a/do/config/generate_config_docs.py +++ b/do/config/generate_config_docs.py @@ -4,7 +4,7 @@ from pathlib import Path -from .primary_configuration import * +from .primary_configuration import primary_config_file documentation_file = Path(".", "doc", "Configuration.md") diff --git a/do/config/primary_configuration.py b/do/config/primary_configuration.py index 0d95feb..fd02f6e 100755 --- a/do/config/primary_configuration.py +++ b/do/config/primary_configuration.py @@ -61,4 +61,3 @@ }] } ] - diff --git a/do/graphs/full/abcd.yml b/do/graphs/abcd.yml similarity index 100% rename from do/graphs/full/abcd.yml rename to do/graphs/abcd.yml diff --git a/do/graphs/full/fumigants_eelworms.yml b/do/graphs/fumigants_eelworms.yml similarity index 100% rename from do/graphs/full/fumigants_eelworms.yml rename to do/graphs/fumigants_eelworms.yml diff --git a/do/graphs/full/m-game.yml b/do/graphs/m-game.yml similarity index 100% rename from do/graphs/full/m-game.yml rename to do/graphs/m-game.yml diff --git a/do/graphs/full/melanoma.yml b/do/graphs/melanoma.yml similarity index 100% rename from do/graphs/full/melanoma.yml rename to do/graphs/melanoma.yml diff --git a/do/graphs/full/pearl-3.4.yml b/do/graphs/pearl-3.4.yml similarity index 100% rename from do/graphs/full/pearl-3.4.yml rename to do/graphs/pearl-3.4.yml diff --git a/do/graphs/full/pearl-3.6.yml b/do/graphs/pearl-3.6.yml similarity index 100% rename from do/graphs/full/pearl-3.6.yml rename to do/graphs/pearl-3.6.yml diff --git a/do/graphs/full/pearl-3.7c.yml b/do/graphs/pearl-3.7c.yml similarity index 100% rename from do/graphs/full/pearl-3.7c.yml rename to do/graphs/pearl-3.7c.yml diff --git a/do/graphs/full/pearl-7.5.yml b/do/graphs/pearl-7.5.yml similarity index 100% rename from do/graphs/full/pearl-7.5.yml rename to do/graphs/pearl-7.5.yml diff --git a/do/graphs/full/simulation.json.yml b/do/graphs/simulation.json.yml similarity index 100% rename from do/graphs/full/simulation.json.yml rename to do/graphs/simulation.json.yml diff --git a/do/graphs/full/square-game.yml b/do/graphs/square-game.yml similarity index 100% rename from do/graphs/full/square-game.yml rename to do/graphs/square-game.yml diff --git a/do/graphs/full/test.json b/do/graphs/test.json similarity index 100% rename from do/graphs/full/test.json rename to do/graphs/test.json diff --git a/do/probability/structures/BackdoorController.py b/do/structures/BackdoorController.py similarity index 85% rename from do/probability/structures/BackdoorController.py rename to do/structures/BackdoorController.py index 08de3b6..6f676bb 100755 --- a/do/probability/structures/BackdoorController.py +++ b/do/structures/BackdoorController.py @@ -8,12 +8,13 @@ ######################################################### from itertools import product +from typing import List, Optional from .Graph import Graph +from .Types import Collection, Path, Vertices, Vertex, V_Type -from ...config.settings import Settings -from ...util.helpers import minimal_sets -from ...util.helpers import power_set +from ..config.settings import Settings +from ..util.helpers import minimal_sets, power_set, str_map class BackdoorController: @@ -31,28 +32,33 @@ def __init__(self, graph: Graph): self.graph = graph.copy() self.graph.reset_disabled() - def backdoor_paths(self, src: set, dst: set, dcf: set) -> list: + def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]: """ Get all possible backdoor paths between some source set of vertices in the internal graph to any vertices in some destination set of vertices. A given (possibly empty) set of deconfounding vertices may serve to block, or even open, some backdoor paths. @param src: The source set of (string) vertices to search for paths from @param dst: The destination set of (string) vertices to search from src towards. - @param dcf: A set of (string) vertices that may serve as a sufficient deconfounding set to block or open + @param dcf: An optional set of (string) vertices that may serve as a sufficient deconfounding set to block or open backdoor paths. @return: A list of lists, where each sublist contains a backdoor path, the first and last element being a vertex from src and dst, respectively, with all vertices between representing the path. All elements are string vertices. """ + paths = [] + src_str = str_map(src) + dst_str = str_map(dst) + dcf_str = str_map(dcf) if dcf else set() + # Use the product of src, dst to try each possible pairing - for s, t in product(src, dst): - paths += self.backdoor_paths_pair(s, t, dcf) + for s, t in product(src_str, dst_str): + paths += self._backdoor_paths_pair(s, t, dcf_str) return paths - def backdoor_paths_pair(self, s: str, t: str, dcf: set) -> list: + def _backdoor_paths_pair(self, s: Collection[str], t: Collection[str], dcf: Collection[str]) -> List[Path]: """ Find all backdoor paths between any particular pair of vertices in the loaded graph @param s: A source (string) vertex in the graph @@ -114,7 +120,7 @@ def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") -> # Filter out the paths that don't "enter" x; see the definition of a backdoor path return list(filter(lambda l: l[0] in self.graph.children(l[1]) and l[1] != t, backdoor_paths)) - def all_dcf_sets(self, src: set, dst: set) -> list: + def all_dcf_sets(self, src: Vertices, dst: Vertices) -> List[Collection[str]]: """ Finds all Z subsets that serve as deconfounding sets between two sets of vertices, such as for the purpose of measuring interventional distributions. @@ -123,8 +129,11 @@ def all_dcf_sets(self, src: set, dst: set) -> list: @return: A list of sets, each set representing a set of variables that are a sufficient Z set """ + src_str = str_map(src) + dst_str = str_map(dst) + # Can't use anything in src, dst, or any descendant of any vertex in src as a deconfounding/blocking vertex - disallowed_vertices = src | dst | set().union(*[self.graph.reach(s) for s in src]) + disallowed_vertices = src_str | dst_str | set().union(*[self.graph.reach(s) for s in src_str]) valid_deconfounding_sets = list() @@ -135,11 +144,11 @@ def all_dcf_sets(self, src: set, dst: set) -> list: any_backdoor_paths = False # Cross represents one (x in X, y in Y) tuple - for s, t in product(src, dst): + for s, t in product(src_str, dst_str): # Get any/all backdoor paths for this particular pair of vertices in src,dst with given potential # deconfounding set - backdoor_paths = self.backdoor_paths_pair(s, t, set(tentative_dcf)) + backdoor_paths = self._backdoor_paths_pair(s, t, set(tentative_dcf)) if len(backdoor_paths) > 0: any_backdoor_paths = True @@ -155,7 +164,7 @@ def all_dcf_sets(self, src: set, dst: set) -> list: return list(valid_deconfounding_sets) - def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> list: + def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> List[Path]: """ Return a list of lists of all paths from a source to a target, with conditional movement from child to parent, or parent to child. @@ -173,7 +182,7 @@ def all_paths_cumulative(self, s: str, t: str, path: list, path_list: list) -> l path_list = self.all_paths_cumulative(child, t, path + [s], path_list) return path_list - def independent(self, src: set, dst: set, dcf: set) -> bool: + def independent(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> bool: """ Helper function that makes some do_calculus logic more readable; determine if two sets are independent, given some third set. @@ -182,12 +191,17 @@ def independent(self, src: set, dst: set, dcf: set) -> bool: @param dcf: A deconfounding set (of strings) Z, to block paths between X and Y @return: True if there are no backdoor paths and no straight-line paths, False otherwise """ + + src_str = str_map(src) + dst_str = str_map(dst) + dcf_str = str_map(dcf) if dcf else set() + # Not independent if there are any unblocked backdoor paths - if len(self.backdoor_paths(src, dst, dcf)) > 0: + if len(self.backdoor_paths(src_str, dst_str, dcf_str)) > 0: return False # Ensure no straight-line variables from any X -> Y or Y -> X - for s, t in product(src, dst): + for s, t in product(src_str, dst_str): if len(self.all_paths_cumulative(s, t, [], [])) != 0: return False # x -> y if len(self.all_paths_cumulative(t, s, [], [])) != 0: diff --git a/do/probability/structures/CausalGraph.py b/do/structures/CausalGraph.py similarity index 89% rename from do/probability/structures/CausalGraph.py rename to do/structures/CausalGraph.py index 24028ff..9f10d18 100755 --- a/do/probability/structures/CausalGraph.py +++ b/do/structures/CausalGraph.py @@ -8,15 +8,16 @@ ######################################################### from itertools import product +from typing import Collection, Union from .BackdoorController import BackdoorController from .Graph import Graph from .Probability_Engine import ProbabilityEngine from .VariableStructures import Outcome, Intervention -from ...config.settings import Settings -from ...util.OutputLogger import OutputLogger -from ...util.helpers import p_str +from ..config.settings import Settings +from ..util.OutputLogger import OutputLogger +from ..util.helpers import p_str class CausalGraph: @@ -44,16 +45,19 @@ def __init__(self, graph: Graph, variables: dict, outcomes: dict, tables: dict, self.latent = latent.copy() self.output = kwargs["output"] if "output" in kwargs else OutputLogger() - def probability_query(self, head: set, body: set) -> float or None: + def probability_query(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]]) -> float: """ Compute a probability in the given model. @param head: A set of Outcome objects @param body: A set of Outcome and/or Intervention objects. @return: A value in the range [0.0, 1.0] if the probability can be computed, None otherwise. """ - def strings(s: set): + def strings(s: Collection[Union[Outcome, Intervention]]): return set(map(lambda x: x.name, s)) + head = set(head) + body = set(body) + self.graph.reset_disabled() # String representation of the given query @@ -95,15 +99,13 @@ def strings(s: set): # Filter down the deconfounding sets not overlapping with our query body vertex_dcf = list(filter(lambda s: len(set(s) & strings(body)) == 0, deconfounding_sets)) - if len(vertex_dcf) == 0: - self.output.result("No deconfounding set Z can exist for the given data.") - return + assert len(vertex_dcf) != 0, "No deconfounding set Z can exist for the given data." # Compute with every possible deconfounding set as a safety measure; ensuring they all match probability = None # Sentinel value for z_set in vertex_dcf: - result = self._marginalize_query(head, body, interventions, z_set) + result = self._marginalize_query(head, body, z_set) if probability is None: # Storing first result probability = result @@ -115,19 +117,21 @@ def strings(s: set): self.graph.reset_disabled() return probability - def _marginalize_query(self, head: set, body: set, interventions: set, dcf: set) -> float: + def _marginalize_query(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]], dcf: Collection[str]) -> float: """ Handle the modified query where we require a deconfounding set due to Interventions / treatments. @param head: The head of the query, a set containing Outcome objects @param body: The body of the query, a set containing Outcome and Intervention objects - @param interventions: A set containing Intervention objects; this should be a subset within body, of all - Intervention objects in the query, since this should already have been found whenever this function is - called. @param dcf: A set of (string) names of variables to serve as a deconfounding set, blocking all backdoor paths between the head and body @return: """ + head = set(head) + body = set(body) + + interventions = set(filter(lambda x: isinstance(x, Intervention), body)) + # Augment graph (isolating interventions as roots) and create engine self.graph.disable_incoming(*interventions) engine = ProbabilityEngine(self.graph, self.outcomes, self.tables) diff --git a/do/probability/structures/ConditionalProbabilityTable.py b/do/structures/ConditionalProbabilityTable.py similarity index 95% rename from do/probability/structures/ConditionalProbabilityTable.py rename to do/structures/ConditionalProbabilityTable.py index e42c712..9fe24a0 100755 --- a/do/probability/structures/ConditionalProbabilityTable.py +++ b/do/structures/ConditionalProbabilityTable.py @@ -6,13 +6,14 @@ # # ######################################################### -from numpy import empty from math import floor, ceil +from numpy import empty +from typing import List from .VariableStructures import Variable, Outcome, Intervention -from ...config.settings import Settings -from ...util.ProbabilityExceptions import MissingTableRow +from ..config.settings import Settings +from ..util.ProbabilityExceptions import MissingTableRow class ConditionalProbabilityTable: @@ -27,7 +28,7 @@ class ConditionalProbabilityTable: # Padding units on the left/right sides of each cell padding = 1 - def __init__(self, variable: Variable, given: list, table_rows: list): + def __init__(self, variable: Variable, given: List[str], table_rows: List): self.variable = variable # The LHS of the table, single-variable only self.given = given # The RHS/body of the table diff --git a/do/probability/structures/Graph.py b/do/structures/Graph.py similarity index 89% rename from do/probability/structures/Graph.py rename to do/structures/Graph.py index 89140c5..0ffe0ea 100755 --- a/do/probability/structures/Graph.py +++ b/do/structures/Graph.py @@ -11,19 +11,16 @@ # We can isolate more generalized graph code here, as well as create a better way to "erase" incoming or outgoing # edges, but only temporarily; this will improve "reach", "parents", etc. -from typing import Union +from typing import Collection, Set, Tuple, Union -from .VariableStructures import Variable, Outcome, Intervention - -# These functions should work with any sort of Variable type, or the name itself -CG_Types = Union[str, Variable, Outcome, Intervention] +from .Types import V_Type class Graph: """A basic graph, with edge control.""" - def __init__(self, v: set, e: set): + def __init__(self, v: Set[str], e: Set[Tuple[str, str]]): """ Initializer for a basic Graph. @param v: A set of vertices @@ -46,7 +43,7 @@ def __init__(self, v: set, e: set): self.topology_map = {vertex: 0 for vertex in v} - def initialize_topology(vertex: CG_Types, depth=0): + def initialize_topology(vertex: V_Type, depth=0): """ Helper function to initialize the ordering of the Variables in the graph @param vertex: A Variable to set the ordering of, and then all its children @@ -70,14 +67,14 @@ def __str__(self) -> str: msg += "Edges:\n" + "\n".join(" -> ".join(i for i in edge) for edge in self.e) return msg - def roots(self) -> set: + def roots(self) -> Collection[str]: """ Get the roots of the the graph G. @return: A set of vertices (strings) in G that have no parents. """ return set([x for x in self.v if len(self.parents(x)) == 0]) - def parents(self, v: CG_Types) -> set: + def parents(self, v: V_Type) -> Collection[Union[str, V_Type]]: """ Get the parents of v, which may actually be currently controlled @param v: A variable in our graph @@ -89,7 +86,7 @@ def parents(self, v: CG_Types) -> set: return {p for p in self.incoming[label] if p not in self.outgoing_disabled and p not in self.outgoing[label]} - def children(self, v: CG_Types) -> set: + def children(self, v: V_Type) -> Collection[Union[str, V_Type]]: """ Get the children of v, which may actually be currently controlled @param v: A variable in our graph @@ -101,7 +98,7 @@ def children(self, v: CG_Types) -> set: return {c for c in self.outgoing[label] if c not in self.incoming_disabled and c not in self.incoming[label]} - def ancestors(self, v: CG_Types) -> set: + def ancestors(self, v: V_Type) -> Collection[Union[str, V_Type]]: """ Get the ancestors of v, accounting for disabled vertices @param v: The vertex to find all ancestors of @@ -119,7 +116,7 @@ def ancestors(self, v: CG_Types) -> set: return ancestors - def reach(self, v: CG_Types) -> set: + def reach(self, v: V_Type) -> Collection[Union[str, V_Type]]: """ Get the reach of v, accounting for disabled vertices @param v: The vertex to find all descendants of @@ -137,7 +134,7 @@ def reach(self, v: CG_Types) -> set: return set(children) - def disable_outgoing(self, *disable: CG_Types): + def disable_outgoing(self, *disable: V_Type): """ Disable the given vertices' outgoing edges @param disable: Any number of vertices to disable @@ -145,7 +142,7 @@ def disable_outgoing(self, *disable: CG_Types): for v in disable: self.outgoing_disabled.add(to_label(v)) - def disable_incoming(self, *disable: CG_Types): + def disable_incoming(self, *disable: V_Type): """ Disable the given vertices' incoming edges @param disable: Any number of vertices to disable @@ -160,7 +157,7 @@ def reset_disabled(self): self.outgoing_disabled.clear() self.incoming_disabled.clear() - def get_topology(self, v: CG_Types) -> int: + def get_topology(self, v: V_Type) -> int: """ Determine the "depth" a given Variable is at in a topological sort of the graph @param v: The variable to determine the depth of @@ -185,7 +182,7 @@ def __copy__(self): copied.outgoing_disabled = self.outgoing_disabled.copy() return copied - def topological_variable_sort(self, variables: list) -> list: + def topological_variable_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]: """ A helper function to abstract what it means to "sort" a list of Variables/Outcomes/Interventions @param variables: A list of any number of Variable/Outcome/Intervention instances @@ -198,7 +195,7 @@ def topological_variable_sort(self, variables: list) -> list: sorted_variables = [[v for v in variables if self.get_topology(v) == i] for i in range(largest_topology+1)] return [item for topology_sublist in sorted_variables for item in topology_sublist] - def descendant_first_sort(self, variables: list) -> list: + def descendant_first_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]: """ A helper function to "sort" a list of Variables/Outcomes/Interventions such that no element has a "parent"/"ancestor" to its left @@ -209,7 +206,7 @@ def descendant_first_sort(self, variables: list) -> list: return self.topological_variable_sort(variables)[::-1] -def to_label(item: CG_Types) -> str: +def to_label(item: V_Type) -> str: """ Convert a variable to its string name, if not already provided as such @param item: The item to convert, either a string (done) or some Variable diff --git a/do/probability/structures/Probability_Engine.py b/do/structures/Probability_Engine.py similarity index 95% rename from do/probability/structures/Probability_Engine.py rename to do/structures/Probability_Engine.py index 56e834e..bd2060d 100755 --- a/do/probability/structures/Probability_Engine.py +++ b/do/structures/Probability_Engine.py @@ -8,14 +8,15 @@ ######################################################### from itertools import product +from typing import Collection, Union from .Graph import Graph from .VariableStructures import Outcome, Intervention -from ...config.settings import Settings -from ...util.OutputLogger import OutputLogger -from ...util.helpers import p_str -from ...util.ProbabilityExceptions import ProbabilityException, ProbabilityIndeterminableException +from ..config.settings import Settings +from ..util.OutputLogger import OutputLogger +from ..util.helpers import p_str +from ..util.ProbabilityExceptions import ProbabilityException, ProbabilityIndeterminableException class ProbabilityEngine: @@ -33,7 +34,7 @@ def __init__(self, graph: Graph, outcomes: dict, tables: dict, **kwargs): self.output = kwargs["output"] if "output" in kwargs else OutputLogger() self._stored_computations = dict() - def probability(self, head: set, body: set) -> float: + def probability(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]]) -> float: """ @param head: A set of Outcome objects representing the head of a query @param body: A set of Outcome/Intervention objects representing the body of a query @@ -42,6 +43,9 @@ def probability(self, head: set, body: set) -> float: @raise AssertionError if there is an Intervention in the head """ + head = set(head) + body = set(body) + # Ensure there are no adjustments/interventions in the head for out in head: assert not isinstance(out, Intervention), f"Error: {out} is in head; no Interventions should be in head." @@ -59,7 +63,7 @@ def probability(self, head: set, body: set) -> float: self.graph.disable_incoming(*interventions) return self._compute(list(head), list(body)) - def _compute(self, head: list, body: list, depth=0) -> float: + def _compute(self, head: Collection[Outcome], body: Collection[Union[Outcome, Intervention]], depth=0) -> float: """ Compute the probability of some head given some body @param head: A list of some number of Outcome objects @@ -270,7 +274,7 @@ def _store_computation(self, string_representation: str, result: float): print("Uh-oh:", string_representation, "has already been cached, but with a different value...") -def contradictory_outcome_set(outcomes: list) -> bool: +def contradictory_outcome_set(outcomes: Collection[Union[Outcome, Intervention]]) -> bool: """ Check whether a list of outcomes contain any contradictory values, such as Y = y and Y = ~y @param outcomes: A list of Outcome objects diff --git a/do/structures/Types.py b/do/structures/Types.py new file mode 100644 index 0000000..c905963 --- /dev/null +++ b/do/structures/Types.py @@ -0,0 +1,11 @@ +from typing import Collection, List, NewType, Union + +from .VariableStructures import Variable, Outcome, Intervention + +# General +V_Type = NewType("V_Type", Union[Variable, Outcome, Intervention]) + +# Graph-related +Vertex = NewType("Vertex", Union[V_Type, str]) +Vertices = NewType("Vertices", Collection[Vertex]) +Path = NewType("Path", List[Vertex]) diff --git a/do/probability/structures/VariableStructures.py b/do/structures/VariableStructures.py similarity index 100% rename from do/probability/structures/VariableStructures.py rename to do/structures/VariableStructures.py diff --git a/do/probability/structures/__init__.py b/do/structures/__init__.py similarity index 92% rename from do/probability/structures/__init__.py rename to do/structures/__init__.py index 9e8d9db..6ba87be 100644 --- a/do/probability/structures/__init__.py +++ b/do/structures/__init__.py @@ -4,5 +4,6 @@ "ConditionalProbabilityTable", "Graph", "Probability_Engine", + "Types", "VariableStructures" ] diff --git a/do/util/ModelLoader.py b/do/util/ModelLoader.py index 53850b3..1815f4b 100755 --- a/do/util/ModelLoader.py +++ b/do/util/ModelLoader.py @@ -3,9 +3,9 @@ from typing import Union from yaml import safe_load as yaml_load -from ..probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable -from ..probability.structures.Graph import Graph -from ..probability.structures.VariableStructures import Variable, Outcome, Intervention +from ..structures.ConditionalProbabilityTable import ConditionalProbabilityTable +from ..structures.Graph import Graph +from ..structures.VariableStructures import Variable def parse_model(file: Union[dict, str, Path]): diff --git a/do/util/helpers.py b/do/util/helpers.py index 5de6487..28d6ae6 100644 --- a/do/util/helpers.py +++ b/do/util/helpers.py @@ -1,7 +1,8 @@ from itertools import chain, combinations -from typing import Iterator +from typing import Collection, Iterator, Union from ..config.settings import Settings +from ..structures.Types import Intervention, Outcome, Vertices def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]: @@ -40,7 +41,7 @@ def disjoint(*sets) -> bool: return len(set().union(*sets)) == sum(map(lambda iterable: len(iterable), sets)) -def p_str(lhs: list, rhs: list) -> str: +def p_str(lhs: Collection[Outcome], rhs: Collection[Union[Outcome, Intervention]]) -> str: """ Convert a head&body to a properly-formatted string @param lhs: The head/LHS of the query; a list of Outcome/Intervention objects @@ -61,3 +62,7 @@ def within_precision(a: float, b: float) -> bool: @return: True if the values are within the margin of error acceptable, False otherwise """ return abs(a - b) < 1 / (10 ** Settings.regression_levels_of_precision) + + +def str_map(to_filter: Vertices): + return set(map(lambda v: v if isinstance(v, str) else v.name, to_filter)) diff --git a/doc/Getting Started.md b/doc/Getting Started.md deleted file mode 100644 index dea636d..0000000 --- a/doc/Getting Started.md +++ /dev/null @@ -1,77 +0,0 @@ -# Getting Started - -How to install and set up the software. - -#### Table of Contents - -* [Installation](#installation) -* [Setup](#setup) -* [Running](#running) - -## Installation - -There are multiple ways to install the software: [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli). - -### Clone - -In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed. - -You can clone the repository using either the [**HTTPS**](#https) URL, or the [**SSH**](#ssh) URL. If you do not know which to choose, or do not intend to commit to the project, use [**HTTPS**](#https). - -#### HTTPS - -To clone with the **HTTPS** URL: - -```shell -git clone https://github.com/bradendubois/probability-code.git -``` - -#### SSH - -To clone with the **SSH** URL: -```shell -git clone git@github.com:bradendubois/probability-code.git -``` - -### Release - -The project's [releases page](https://github.com/bradendubois/probability-code/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. - -Releases: [https://github.com/bradendubois/probability-code/releases](https://github.com/bradendubois/probability-code/releases) - -Releases are automatically created and tagged using [semantic-release](https://github.com/semantic-release/semantic-release). - -### CLI - -To clone with the [GitHub CLI](https://cli.github.com/). - -```shell -gh repo clone bradendubois/probability-code -``` - -## Setup - -Setup requirements for the project are: -- **[Python 3.8+](https://www.python.org/)** -- [**pip**](https://pip.pypa.io/en/stable/) is used to install [required packages](#python-requirements). - -**Note**: `pip` will already be installed with any installation of **Python 3.4+**. - -### Python Requirements - -At present, the only package not part of a default Python installation is [NumPy](https://numpy.org/). To install *numpy* exclusively: - -```shell -pip install numpy -``` -However, in the event that more packages become used, the more generalized following command will install all necessary packages in ``requirements.txt``: - -```shell -pip install -r requirements.txt -``` - -## Running - -A basic [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) is available, and [[details can be found here|REPL]]. - -An [API](https://en.wikipedia.org/wiki/API) is also available, and [[details can be found here|API]]. diff --git a/doc/Home.md b/doc/Home.md deleted file mode 100644 index 45f6a1b..0000000 --- a/doc/Home.md +++ /dev/null @@ -1,11 +0,0 @@ -# probability-code wiki - -This wiki is under construction, and most documentation is still under revision or subject to change in the near future. - -At present, the pages listed below are for the newer versions of the project under the ``develop`` branch, while other pages accessible from the sidebar are for the first tagged version of the project. - -#### Table of Contents - -* [[Getting Started]] -* [[REPL]] -* [[Causal Models]] diff --git a/setup.cfg b/setup.cfg index eab5072..0fc7713 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,13 +11,15 @@ exclude = [coverage:run] relative_files = True -source = do/ +source = + do/ + tests/ omit = do/API.py do/__main__.py - do/config/* + do/config/generate_config_docs.py + do/config/config_manager.py do/util/OutputLogger.py - do/graphs/dataset_generator [coverage:report] exclude_lines = diff --git a/setup.py b/setup.py index be7284f..e1537c8 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ from pathlib import Path -from setuptools import find_packages, setup +from setuptools import setup from os import environ cwd = Path(".") diff --git a/tests/backdoors/backdoor_path_tests.py b/tests/backdoors/backdoor_path_tests.py index dbf94e4..b96d340 100644 --- a/tests/backdoors/backdoor_path_tests.py +++ b/tests/backdoors/backdoor_path_tests.py @@ -3,11 +3,11 @@ from pathlib import Path from yaml import safe_load as load -from ..test_util import print_test_result - -from do.probability.structures.BackdoorController import BackdoorController +from do.structures.BackdoorController import BackdoorController from do.util.ModelLoader import parse_model +from ..test_util import print_test_result + test_file_directory = Path(dirname(abspath(__file__))) / "test_files" @@ -32,11 +32,11 @@ def model_backdoor_validation(bc: BackdoorController, test_data: dict) -> (bool, expected_paths = list(map(sorted, test["expect"])) - paths = [] - for s, t in itertools.product(test["src"], test["dst"]): - paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {})) + src = test["src"] + dst = test["dst"] + dcf = test["dcf"] if "dcf" in test else set() - # Sort each path to improve some sor + paths = bc.backdoor_paths(src, dst, dcf) paths = list(map(sorted, paths)) if test["exhaustive"] and len(paths) != len(expected_paths): # coverage: skip diff --git a/tests/inference/inference_tests.py b/tests/inference/inference_tests.py index 46b182b..fa3464b 100755 --- a/tests/inference/inference_tests.py +++ b/tests/inference/inference_tests.py @@ -2,14 +2,14 @@ from pathlib import Path from yaml import safe_load as load -from ..test_util import print_test_result - -from do.probability.structures.CausalGraph import CausalGraph, Outcome -from do.probability.structures.VariableStructures import parse_outcomes_and_interventions +from do.structures.CausalGraph import CausalGraph, Outcome +from do.structures.VariableStructures import parse_outcomes_and_interventions from do.util.helpers import within_precision from do.util.ModelLoader import parse_model from do.util.ProbabilityExceptions import * +from ..test_util import print_test_result + test_file_directory = Path(dirname(abspath(__file__))) / "test_files" @@ -89,12 +89,25 @@ def inference_tests(graph_location: Path) -> (bool, str): head = parse_outcomes_and_interventions(test["head"]) body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set() - result = cg.probability_query(head, body) expected = test["expect"] - if expected != "failure" and not within_precision(result, expected): # coverage: skip - print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}") - test_file_success = False + try: + + result = cg.probability_query(head, body) + + # Should have raised assertion error... + if expected == "failure": + print_test_result(False, f"Expected test to fail, but it did not! {graph_filename}") + test_file_success = False + + if expected != "failure" and not within_precision(result, expected): # coverage: skip + print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}") + test_file_success = False + + except AssertionError: + if expected != "failure": + print_test_result(False, f"Unexpected assertion error! {graph_filename}") + test_file_success = False if test_file_success: print_test_result(True, f"All tests in {test_file}|{graph_filename} passed") diff --git a/tests/test_driver.py b/tests/test_driver.py index 1c73cde..cbe4053 100644 --- a/tests/test_driver.py +++ b/tests/test_driver.py @@ -7,11 +7,11 @@ from do.api.joint_distribution_table import api_joint_distribution_table from do.api.probability_query import api_probability_query, api_probability_query_parse -from do.probability.structures.BackdoorController import BackdoorController -from do.probability.structures.CausalGraph import CausalGraph -from do.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable -from do.probability.structures.Graph import Graph, to_label -from do.probability.structures.VariableStructures import Outcome, Variable, Intervention +from do.structures.BackdoorController import BackdoorController +from do.structures.CausalGraph import CausalGraph +from do.structures.ConditionalProbabilityTable import ConditionalProbabilityTable +from do.structures.Graph import Graph, to_label +from do.structures.VariableStructures import Outcome, Variable, Intervention from do.util.helpers import power_set, disjoint, minimal_sets, within_precision from do.util.ModelLoader import parse_model @@ -27,7 +27,7 @@ default_model_file = "pearl-3.4.yml" # Default location for the graphs made by hand -graphs = Path("do", "graphs", "full") +graphs = Path("do", "graphs") # Path to the Xi-Xj model test_file = graphs / default_model_file @@ -175,7 +175,7 @@ def test_probability_lookup(): try: assert t.probability_lookup(Outcome("Xj", "foo"), priors) == 100 - raise Exception + raise Exception # coverage: skip except MissingTableRow: pass @@ -426,14 +426,14 @@ def test_parse_model(): # nonexistent file try: parse_model(Path("fake", "path", "fake")) - raise Exception + raise Exception # coverage: skip except FileNotFoundError: pass # invalid file try: parse_model(Path("do", "util", "helpers.py")) - raise Exception + raise Exception # coverage: skip except FileNotFoundError: pass diff --git a/wiki/Backdoor Paths.md b/wiki/Backdoor Paths.md new file mode 100644 index 0000000..ef2a768 --- /dev/null +++ b/wiki/Backdoor Paths.md @@ -0,0 +1,56 @@ +How to discover backdoor paths between two sets of variables in a given [[causal model|Causal Models]]. + +## Basic Backdoor Paths + +Assume the following model uses the graph **G = (V, E)**, where: +- **V** = ``{x, y, z}`` +- **E** = ``{(x, y), (z, x), (z, y)}`` + +```python +from do.API import Do + +# Assume this were a detailed model conforming to the above graph... +model = dict() + +do_api = Do(model) + +backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set()) + +for path in backdoor_paths: + print(f"Backdoor path from x->y!: {path}") +``` + +``backdoor_paths`` returns a list of lists, in which each sub-list consists of the vertices (end-points included) connecting some vertex in the ``src`` set to some vertex in the ``dst`` set. +- In this example, the return value would be ``[["x", "z", "y"]]``, as this denotes the singular backdoor path ``x <- z -> y``. + +**Important** +- The first parameter is the set of source variables from which the pathfinding begins. +- The second parameter is the set of destination variables to which the pathfinding attempts to reach. +- A third parameter is a set of *deconfounding* variables by which to "block" backdoor paths. +- The deconfounding set currently must be given, even if empty. +- Each sublist, a backdoor path, is ordered such that the path order is correctly maintained. + +## Deconfounding Variables + +Assuming the same graph as defined [above](#basic-backdoor-paths)... + +```python +from do.API import Do + +# Assume this were a detailed model conforming to the above graph... +model = dict() + +do_api = Do(model) + +backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set()) + +for path in backdoor_paths: + print(f"Backdoor path from x->y!: {path}") + +blocked = do_api.backdoor_paths({"x"}, {"y"}, {"z"}) + +assert len(blocked) == 0 +``` + +**Important** +- If all backdoor paths are successfully blocked, an **empty list** is returned. diff --git a/doc/Causal Models.md b/wiki/Causal Models.md similarity index 53% rename from doc/Causal Models.md rename to wiki/Causal Models.md index 256ee0e..4409c8c 100755 --- a/doc/Causal Models.md +++ b/wiki/Causal Models.md @@ -1,16 +1,11 @@ -# Causal Models +This document outlines the structure of how to create a causal model for use in the package, such as in the [[API|Do API]]. -This document outlines the structure of how to create a causal model. - -Models are inherently **DAGs**, where each variable in a model is also represented as a vertex in the DAG. - -Models can be stored in ``json`` and ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions. -- The default graph folder is ``src/graphs/full``. +Models are inherently **DAGs** (Directed Acyclic Graph), where each variable in a model is also represented as a vertex in the DAG. ## Model Structure -The graph file must be structured such that a key with value ``model`` is present, and corresponds to a list, where each item is itself a dictionary, representing one variable in the model. -- Each variable in the model is represented by a unique key representing the variable's name, and corresponds to the following key-value pairs: +A model is represented as dictionary, mapping the name of one variable in the model to its detailed information. +- A variable's detailed information consists of the following key-value pairs: - ``outcomes``: all discrete outcomes the variable may take, represented as a list. - ``parents``: parent variables (also defined in the model) of the current variable, represented as a list. - If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely. @@ -19,7 +14,16 @@ The graph file must be structured such that a key with value ``model`` is presen - ``latent``: a boolean representing whether the variable is unobservable in the given model. - If this key is absent, it will be assumed ``False`` - that is, assumed observable. -Additionally, a key ``model`` can be given, corresponding to an arbitrary name for the model. +Additionally, a key ``name`` can be given, corresponding to an arbitrary name for the model. + +## Files + +Models can be stored in ``json`` or ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions. +- A handful of models are stored in ``do/graphs``. + +## Dictionaries + +A model can also be stored as a Python dictionary directly, and loaded into an instance of the [[API|Do API]]. ### Example @@ -52,3 +56,34 @@ model: This represents the basic graph of a single edge, (Y, X). - In the absence of any ``latent`` attributes, both variables are observable. - ``Y`` has no parents, it is a root. + +#### Dictionary + +Here is the [above example](#example), represented as a Python dictionary. + +```py +m = { + "name": "Simple Model", + "model": { + "Y": { + "outcomes": ["y", "~y"], + "table": [ + ["y", 0.7], + ["~y", 0.3] + ] + }, + "X": { + "outcomes": ["x", "~x" ], + "parents": [ "Y" ], + "table": [ + ["x", "y", 0.9], + ["x", "~y", 0.75], + ["~x", "y", 0.1], + ["~x", "~y", 0.25] + ] + } + } +} +``` + +Both representations be used in the [[API|Do API]]. diff --git a/doc/Configuration.md b/wiki/Configuration.md similarity index 94% rename from doc/Configuration.md rename to wiki/Configuration.md index e99f115..65f8cc3 100644 --- a/doc/Configuration.md +++ b/wiki/Configuration.md @@ -1,6 +1,4 @@ -# Configuration File Settings - -Settings for the project are stored in ``src/config/config.yml``. +Settings for the project are stored in ``config.yml`` in the same directory as the Python file that imports ``Do``. - **Note**: This file will be created if it does not exist, when the project is run. ## Output Control diff --git a/wiki/Deconfounding Sets.md b/wiki/Deconfounding Sets.md new file mode 100644 index 0000000..ab258fe --- /dev/null +++ b/wiki/Deconfounding Sets.md @@ -0,0 +1,46 @@ +# Deconfounding Sets + +Finding all deconfounding sets between two sets of vertices. + +## Basic Example + +Assuming the basic 3-vertex graph from [[Backdoor Paths]], **G = (V, E)** where: +- **V** = ``{x, y, z}`` +- **E** = ``{(x, y), (z, x), (z, y)}`` + +```python +from do.API import Do + +# Assume this were a detailed model conforming to the above graph... +model = dict() + +do_api = Do(model) + +dcf = do_api.deconfounding_sets({"x"}, {"y"}) + +for deconfounding_set in dcf: + print(f"Deconfounding set for x->y!: {deconfounding_set}") +``` + +**Important**: +- ``deconfounding_sets`` takes a *source* set of variables, and a *destination/target* set of variables. +- A list of sets is returned, where each set consists of one possible set by which to block all deconfounding paths. + +## Usage of Deconfounding Sets + +Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handles deconfounding. An easy check to verify each deconfounding set: + + +```python +from do.API import Do + +# Assume this were a more complicated model +model = dict() + +do_api = Do(model) + +dcf = do_api.deconfounding_sets({"x"}, {"y"}) + +for deconfounding_set in dcf: + assert len(do_api.backdoor_paths({"x"}, {"y"}, deconfounding_set)) == 0 +``` diff --git a/wiki/Do API.md b/wiki/Do API.md new file mode 100644 index 0000000..00a6577 --- /dev/null +++ b/wiki/Do API.md @@ -0,0 +1,79 @@ +Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project. + +This assumes the steps in the [[Installation]] section have been followed, and the project is set up. + +**Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package. + +## Table of Contents + +* [Importing the **Do** API](#importing) +* [Loading a Model](#loading-a-model) + +## Importing + +To import the package: + +```python +import do +``` + +**Important**: +- The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``. + +


+ +To import *just* the API: + +```python +from do.API import Do +``` + +**Important**: +- The API, represented as a Python class, is called **Do**. +- **Do** is stored in the file ``API``, so it can be imported from ``do.API``. + +## Loading a Model + +Let's create an instance of the API, using the model from [[Installation]]: + +```python +from do.API import Do + +m = { + "name": "Simple Model", + "model": { + "Y": { + "outcomes": ["y", "~y"], + "table": [ + ["y", 0.7], + ["~y", 0.3] + ] + }, + "X": { + "outcomes": ["x", "~x" ], + "parents": [ "Y" ], + "table": [ + ["x", "y", 0.9], + ["x", "~y", 0.75], + ["~x", "y", 0.1], + ["~x", "~y", 0.25] + ] + } + } +} + +x = Do(m) +``` + +**Important**: +- A regular Python dictionary representation of a [[causal model|Causal Models]] is valid input to **Do**. +- Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time. + +## Further + +Now that a model is successfully loaded, one can begin [[querying distributions|Probability Queries]]. + +See any of the more specific pages: +* [[Probability Queries]] +* [[Backdoor Paths]] +* [[Deconfounding Sets]] diff --git a/wiki/GitHub.md b/wiki/GitHub.md new file mode 100644 index 0000000..7d78b16 --- /dev/null +++ b/wiki/GitHub.md @@ -0,0 +1,54 @@ +Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki). + +## Acquiring a Copy + +To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli). + +After a copy has been acquired, [install the extra dependencies](#extra-dependencies). + +## Clone + +In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed. + +You can clone the repository using either the **HTTPS** or **SSH** URL. If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**. + +To clone with the **HTTPS** URL: + +```shell +git clone https://github.com/bradendubois/do-calculus.git +``` + +To clone with the **SSH** URL: +```shell +git clone git@github.com:bradendubois/do-calculus.git +``` + +## Release + +The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. + +**Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases) + +Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release). + +## CLI + +To clone with the [GitHub CLI](https://cli.github.com/). + +```shell +gh repo clone bradendubois/do-calculus +``` + +## Extra Dependencies + +After acquiring a copy from any of the above steps: + +```shell +pip install -r requirements.txt +``` + +The above command will install all dependencies listed in ``requirements.txt``. + +## Further + +An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]]. diff --git a/wiki/Home.md b/wiki/Home.md new file mode 100644 index 0000000..084bc8c --- /dev/null +++ b/wiki/Home.md @@ -0,0 +1,5 @@ +# do-calculus wiki + +This wiki is *under construction*, and most documentation is still a work in progress. + +See the Sidebar for relevant links. diff --git a/wiki/Installation.md b/wiki/Installation.md new file mode 100644 index 0000000..890eedb --- /dev/null +++ b/wiki/Installation.md @@ -0,0 +1,20 @@ +How to install and set up the software. + +## Table of Contents + +* [Requirements](#requirements) +* [Options](#options) + +## Requirements + +Setup requirements for the project are: +- **[Python 3.8+](https://www.python.org/)** +- [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages. + +**Note**: `pip` will already be installed with any installation of **Python 3.4+**. + +## Options + +There are **two** main ways to install the package: +- [[Install from PyPI|PyPI]] +- [[Install from source|GitHub]] diff --git a/wiki/Literature.md b/wiki/Literature.md new file mode 100644 index 0000000..151d30b --- /dev/null +++ b/wiki/Literature.md @@ -0,0 +1,11 @@ +TODO - References galore to backdoor paths, deconfounding, and more! + +## Books + +* Causality (2nd Edition) - Judea Pearl, 2009 +* The Book of Why: The New Science of Cause and Effect - Judea Pearl and Dana Mackenzie, 2018 +* Causal Inference in Statistics: A Primer - Judea Pearl, Madelyn Glymour, Nicholas P. Jewell, 2016 + +## Papers + +TODO - Shpitser & Pearl 2004, Thesis, and a few more. diff --git a/wiki/Probability Queries.md b/wiki/Probability Queries.md new file mode 100644 index 0000000..a98d030 --- /dev/null +++ b/wiki/Probability Queries.md @@ -0,0 +1,89 @@ +How to measure probabilities using the **Do** API. + +## Making a Query + +For this, we will query a standard probability through the **Do** API. + +```python +from do.API import Do +from do.structures.VariableStructures import Outcome + +m = { + "name": "Simple Model", + "model": { + "Y": { + "outcomes": ["y", "~y"], + "table": [ + ["y", 0.7], + ["~y", 0.3] + ] + }, + "X": { + "outcomes": ["x", "~x" ], + "parents": [ "Y" ], + "table": [ + ["x", "y", 0.9], + ["x", "~y", 0.75], + ["~x", "y", 0.1], + ["~x", "~y", 0.25] + ] + } + } +} + +do_api = Do(m) + +x = Outcome("X", "x") +y = Outcome("Y", "y") + +x_alone = do_api.p({x}, set()) +print(f"The probability of X=x, P(X=x) = {x_alone:5}") + +x_if_y = do_api.p({x}, {y}) +print(f"The probability of P(X=x | Y=y) = {x_if_y:5}") + +x_and_y = do_api.p({x, y}, set()) +print(f"The probability of P(X=x, Y=y) = {x_and_y:5}") +``` + +**Important**: +- The representation of a variable in the model having some *observed* value is implemented as an **Outcome** object. +- The creation of an Outcome object is to supply the *name* of the variable, and *some outcome of this variable*. +- The Outcome class is located at ``do.structures.VariableStructures``. +- The API function provided in **Do** to query a probability is the ``p`` function. +- **Do.p** takes *two* arguments, a *Collection of outcome outcomes*, and a *Collection of "given" outcomes*. +- **Do.p** requires an empty set as its "given" outcomes even if there are none. +- **Do.p** returns a *float*, between [0, 1]. + +## Querying an Interventional Distribution + +Assume the existence of some more complicated model, ``m_confounded``, in which multiple variables are susceptible to *backdoor paths* or *confounding*, but a sufficient *deconfounding set* can block all backdoor paths. +- See [[Literature]] for more details on *backdoor paths* and *deconfounding*. + +```python +from do.API import Do +from do.structures.VariableStructures import Outcome, Intervention + +# Assume this were some more complicated model... +m_confounding = dict() + +do_api = Do(m_confounding) + +x = Outcome("X", "x") + +y_outcome = Outcome("Y", "y") +y_intervention = Intervention("Y", "y") + +x_y = do_api.p({x}, {y_outcome}) +x_do_y = do_api.p({x}, {y_intervention}) + +if x_y != x_do_y: + print(f"P(X=x | Y=y) ({x_y:5}) != P(X=x | do(Y=y)) ({x_do_y:5}): Y shows causal influence over X!") +``` + +**Important**: +- A *treatment* or *intervention* is represented by the **Intervention** object. +- The Intervention class is located at ``do.structures.VariableStructures``, the same as the Outcome class. +- The Intervention class takes the same arguments as the Outcome class. +- Queries involving interventions use **Do.p** just as standard queries do. +- The "given" / body of a query is a *Collection* of Outcomes and Interventions. diff --git a/wiki/PyPI.md b/wiki/PyPI.md new file mode 100644 index 0000000..9fd0365 --- /dev/null +++ b/wiki/PyPI.md @@ -0,0 +1,29 @@ +Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/). + +## PyPI Package + +The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/). + +To install from [PyPI](https://pypi.org/) as a package: + +```shell +pip install do-calculus +``` + +## Upgrade + +To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag: + +```shell +pip install -U do-calculus +``` + +## PyPI Release Cycle + +By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions). + +Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project. + +*Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days. + +See the [[API|Do API]] page for importing and using the package once installed. diff --git a/wiki/Resources.md b/wiki/Resources.md new file mode 100644 index 0000000..4eb50c3 --- /dev/null +++ b/wiki/Resources.md @@ -0,0 +1,5 @@ +A collection of resources for information the project, or *do-calculus* generally. + +* [[Configuration]]: Settings for the project. +* [[Causal Models]]: Details on the structure of a causal model for use in the package. +* [[Literature]]: Books and papers referenced in the implementation of this project. diff --git a/wiki/_Sidebar.md b/wiki/_Sidebar.md new file mode 100644 index 0000000..8761208 --- /dev/null +++ b/wiki/_Sidebar.md @@ -0,0 +1,15 @@ +### [[Home]] + +### [[Installation]] +* [[PyPI]] +* [[GitHub]] + +### [[Resources]] +* [[Configuration]] +* [[Causal Models]] +* [[Literature]] + +### [[Do API]] +* [[Probability Queries]] +* [[Backdoor Paths]] +* [[Deconfounding Sets]]