From 7e63b10c2f92e891d20e12aa5265c7df2ee85780 Mon Sep 17 00:00:00 2001 From: Pierre Senellart Date: Tue, 7 Nov 2023 19:55:32 +0800 Subject: [PATCH] Optimize Shapley value computation in the non-probabilistic case --- src/BooleanCircuit.cpp | 4 ++++ src/BooleanCircuit.h | 6 ++++++ src/dDNNF.cpp | 22 +++++++++++++++------- src/dDNNF.h | 2 +- test/expected/shapley.out | 16 ++++++++++++++++ test/sql/shapley.sql | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/BooleanCircuit.cpp b/src/BooleanCircuit.cpp index ab48441..5f1ad02 100644 --- a/src/BooleanCircuit.cpp +++ b/src/BooleanCircuit.cpp @@ -57,6 +57,8 @@ gate_t BooleanCircuit::setGate(const uuid &u, BooleanGate type) gate_t BooleanCircuit::setGate(const uuid &u, BooleanGate type, double p) { auto id = setGate(u, type); + if(std::isnan(p)) + p=1.; setProb(id,p); return id; } @@ -64,6 +66,8 @@ gate_t BooleanCircuit::setGate(const uuid &u, BooleanGate type, double p) gate_t BooleanCircuit::setGate(BooleanGate type, double p) { auto id = setGate(type); + if(std::isnan(p)) + p=1.; setProb(id,p); return id; } diff --git a/src/BooleanCircuit.h b/src/BooleanCircuit.h index 642a631..a93cb92 100644 --- a/src/BooleanCircuit.h +++ b/src/BooleanCircuit.h @@ -30,6 +30,7 @@ std::set inputs; std::set mulinputs; std::vector prob; std::map info; +bool probabilistic=false; public: BooleanCircuit() { @@ -40,11 +41,16 @@ gate_t setGate(const uuid &u, BooleanGate t) override; gate_t setGate(BooleanGate t, double p); gate_t setGate(const uuid &u, BooleanGate t, double p); void setProb(gate_t g, double p) { + if(!probabilistic && p!=1.) + probabilistic=true; prob[static_cast::type>(g)]=p; } double getProb(gate_t g) const { return prob[static_cast::type>(g)]; } +bool isProbabilistic() const { + return probabilistic; +} void setInfo(gate_t g, unsigned info); unsigned getInfo(gate_t g) const; diff --git a/src/dDNNF.cpp b/src/dDNNF.cpp index f051853..8ebd725 100644 --- a/src/dDNNF.cpp +++ b/src/dDNNF.cpp @@ -196,6 +196,10 @@ double dDNNF::probabilityEvaluation() const std::unordered_map > dDNNF::shapley_delta(gate_t root) const { std::unordered_map > result; + + if(!isProbabilistic()) + return result; + // Stack to simulate recursion: contains a pair (node, b) where b // indicates whether this is the beginning (false) or ending (true) of // the processing of a node @@ -317,10 +321,11 @@ std::vector > dDNNF::shapley_alpha(gate_t root) const { stack.push(std::make_pair(getWires(node)[0], false)); } else { result[node] = result[getWires(node)[0]]; - for(unsigned k=0; k > dDNNF::shapley_alpha(gate_t root) const { result[node] = result[getWires(node)[0]]; for(size_t i=1; i > dDNNF::shapley_alpha(gate_t root) const { const auto n1=r1.size()-1; const auto n2=r2.size()-1; result[node].resize(n1+n2+1); - for(size_t k=0; k<=n1+n2; ++k) { + auto k0=isProbabilistic()?0:n1+n2; + for(size_t k=k0; k<=n1+n2; ++k) { result[node][k].resize(k+1); for(size_t l=0; l<=k; ++l) { for(size_t k1=std::max(0,static_cast(k-n2)); k1<=std::min(k,n1); ++k1) @@ -396,10 +403,11 @@ double dDNNF::shapley(gate_t var) const { double result=0.; - for(size_t k=0; k=alpha_pos.size()?0.:alpha_pos[k][l]; - double neg = k>=alpha_neg.size()?0.:alpha_neg[k][l]; + double pos = alpha_pos[k][l]; + double neg = alpha_neg[k][l]; result += (pos-neg)/comb(k,l)/(k+1); } diff --git a/src/dDNNF.h b/src/dDNNF.h index 7cea3f0..201279b 100644 --- a/src/dDNNF.h +++ b/src/dDNNF.h @@ -25,7 +25,7 @@ mutable std::unordered_map probability_cache; std::unordered_map > shapley_delta(gate_t root) const; std::vector > shapley_alpha(gate_t root) const; std::vector topological_order(const std::vector > &reversedWires) const; -gate_t root; +gate_t root{0}; public: gate_t getRoot() const { diff --git a/test/expected/shapley.out b/test/expected/shapley.out index 3de7d7f..a7346d9 100644 --- a/test/expected/shapley.out +++ b/test/expected/shapley.out @@ -15,3 +15,19 @@ Nancy | Paris | 0.156 (7 rows) + remove_provenance +------------------- + +(1 row) + + name | city | shapley +----------+----------+--------- + Ellen | Berlin | -0.036 + Susan | Berlin | -0.036 + John | New York | -0.036 + Paul | New York | -0.036 + Dave | Paris | 0.048 + Magdalen | Paris | 0.048 + Nancy | Paris | 0.048 +(7 rows) + diff --git a/test/sql/shapley.sql b/test/sql/shapley.sql index dfdfab2..91d0e7a 100644 --- a/test/sql/shapley.sql +++ b/test/sql/shapley.sql @@ -22,3 +22,35 @@ SELECT name, city, ROUND(shapley::numeric,3) AS shapley FROM shapley_result ORDER BY city, name; DROP TABLE shapley_result; + +-- Shapley computation in the non-probabilistic case +DO $$ BEGIN + PERFORM set_prob(provenance(), 1.) FROM personnel; +END $$; + +CREATE TABLE shapley_result AS + SELECT name, city, shapley(c.provenance,p.provenance) FROM ( + SELECT provenance() from (SELECT DISTINCT 1 FROM ( + (SELECT DISTINCT city FROM personnel) + EXCEPT + (SELECT p1.city + FROM personnel p1, personnel p2 + WHERE p1.city = p2.city AND p1.id < p2.id + GROUP BY p1.city + ORDER BY p1.city) + ) t + ) u) + AS c, + (SELECT *, provenance() FROM personnel) AS p; + +SELECT remove_provenance('shapley_result'); + +SELECT name, city, ROUND(shapley::numeric,3) AS shapley FROM shapley_result +ORDER BY city, name; + +DROP TABLE shapley_result; + +-- Put back original probability values +DO $$ BEGIN + PERFORM set_prob(provenance(), id*1./10) FROM personnel; +END $$;