From 13bb903f59c0ed85d9ecf81a6ddb46b3ce10b730 Mon Sep 17 00:00:00 2001 From: kss2k Date: Mon, 28 Oct 2024 19:18:08 +0100 Subject: [PATCH 1/5] better handling of partially missing names --- src/rwrapr/rlist.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/rwrapr/rlist.py b/src/rwrapr/rlist.py index c8c7223..d9d5323 100644 --- a/src/rwrapr/rlist.py +++ b/src/rwrapr/rlist.py @@ -38,6 +38,9 @@ def to_py(self) -> list[Any]: return out +ListTypes = list[Any] | tuple[Any] | set[Any] | RList + + class RDict(UserDict[str, Any]): def __init__(self, x: Any, attributes: dict[str, Any] | None): super().__init__(x) @@ -64,9 +67,10 @@ def to_py(self) -> dict[str, Any]: return out -def convert_r2pylist( - x_collection: list[Any] | tuple[Any] | RList, -) -> list[Any] | tuple[Any]: +DictTypes = dict[str, Any] | OrderedDict[str, Any] | UserDict[str, Any] | RDict + + +def convert_r2pylist(x_collection: ListTypes) -> list[Any] | tuple[Any]: from .convert_r2py import convert_r2py out: tuple[Any] | list[Any] = [convert_r2py(x) for x in x_collection] @@ -80,9 +84,16 @@ def convert_rlist2py(x_collection: vc.ListVector | vc.ListSexpVector) -> Any: from .rattributes import get_rattributes names = convert_numpy(x_collection.names, flatten=False) + if isinstance(names, int | str | float | bool): names = np.array([names], dtype="U") + if names is not None: + fill = np.arange(1, len(names) + 1).astype("U") + if names.itemsize < fill.itemsize: + names = names.astype(fill.dtype) + names[names == ""] = fill[names == ""] + attributes = get_rattributes(x_collection, exclude=["names"]) if attributes is not None: @@ -104,10 +115,7 @@ def is_rlist(x_collection: Any) -> bool: return False -def convert_r2pydict( - x_collection: dict[str, Any] | OrderedDict[str, Any] | UserDict[str, Any] | RDict, - is_rdict: bool = False, -) -> Any: +def convert_r2pydict(x_collection: DictTypes, is_rdict: bool = False) -> Any: from .convert_r2py import convert_r2py # this needs to be improved considering named vectors @@ -122,13 +130,13 @@ def convert_r2pydict( return x_collection -def dict2rlist(x: dict[str, Any] | OrderedDict[str, Any] | RDict) -> ro.ListVector: +def dict2rlist(x: DictTypes) -> ro.ListVector: from .convert_py2r import convert_py2r return ro.ListVector({k: convert_py2r(v) for k, v in x.items()}) -def pylist2rlist(x: list[Any] | tuple[Any] | set[Any] | RList) -> ro.ListVector: +def pylist2rlist(x: ListTypes) -> ro.ListVector: y: dict[str, Any] = {str(k): v for k, v in enumerate(x)} unname: Callable[..., Any] = rcall("unname") return unname(dict2rlist(y)) From 275c5956c208de8c2a1784ac4975d5c7525f1386 Mon Sep 17 00:00:00 2001 From: kss2k Date: Mon, 28 Oct 2024 19:18:41 +0100 Subject: [PATCH 2/5] One more test in gausssuppressionfromdata.py --- tests/test_gausssuppressionfromdata.py | 152 +++++++++++++------------ 1 file changed, 78 insertions(+), 74 deletions(-) diff --git a/tests/test_gausssuppressionfromdata.py b/tests/test_gausssuppressionfromdata.py index 8328f84..0acfb3a 100644 --- a/tests/test_gausssuppressionfromdata.py +++ b/tests/test_gausssuppressionfromdata.py @@ -4,22 +4,22 @@ import rwrapr as wr -ST = wr.library("SSBtools") -GS = wr.library("GaussSuppression") +st = wr.library("SSBtools") +gs = wr.library("GaussSuppression") bs = wr.library("base") printInc = False def test_gausssuppressionfromdata_works(): - m = GS.GaussSuppressionFromData( - ST.SSBtoolsData("z1"), np.array([1, 2]), 3, printInc=printInc + m = gs.GaussSuppressionFromData( + st.SSBtoolsData("z1"), np.array([1, 2]), 3, printInc=printInc ) - assert np.all(GS.which(m["suppressed"]) == [12, 13, 22, 23, 42, 43]) + assert np.all(gs.which(m["suppressed"]) == [12, 13, 22, 23, 42, 43]) # Sample with seed inside test_that do not work -z3 = ST.SSBtoolsData("z3") +z3 = st.SSBtoolsData("z3") upper = z3["region"].str.isupper() z3["region"][upper] = z3["region"][upper] + "2" z3["region"][~upper] = z3["region"][~upper].str.upper() + "1" @@ -27,25 +27,22 @@ def test_gausssuppressionfromdata_works(): z3["kostragr"] = z3["kostragr"].astype("int") z3["ant"] = z3["ant"].astype("int") -mm = ST.ModelMatrix(z3.iloc[:, np.arange(0, 6)], crossTable=True, sparse=False) +mm = st.ModelMatrix(z3.iloc[:, np.arange(0, 6)], crossTable=True, sparse=False) -get_x = GS.function( - """function(mm) { - x = mm$modelMatrix - k = 1:20000 - set.seed(123) - sample_k = sample(k) - x[k] = x[sample_k] - x -}""" -) -x = get_x(mm) +x_p = mm["modelMatrix"] +k = np.arange(20000) + +bs.set_seed(123) +sample_k = bs.sample(k) +y = x_p.flatten(order="F") +y[k] = y[sample_k] +x = y.reshape(x_p.shape, order="F") # test_that("Advanced with integer overflow", { def test_advanced_with_integer_overflow(): # This test will not pass on all platforms, ask the original author for more information - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -57,7 +54,7 @@ def test_advanced_with_integer_overflow(): ) assert bs.sum(bs.which(a["suppressed"])) == 599685 - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -69,7 +66,7 @@ def test_advanced_with_integer_overflow(): assert bs.sum(bs.which(a["suppressed"])) == 525957 # This test involves integer overflow in AnyProportionalGaussInt - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -83,7 +80,7 @@ def test_advanced_with_integer_overflow(): assert bs.sum(bs.which(a["suppressed"])) == 411693 # This test involves all ways of updating A["r[[i]]"], A$x[[i]], B$r[[i]], B$x[[i]] (Including integer overflow) - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -97,7 +94,7 @@ def test_advanced_with_integer_overflow(): ) assert bs.sum(bs.which(a["suppressed"])) == 411693 - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -115,7 +112,7 @@ def test_advanced_with_integer_overflow(): x[:, np.arange(200, 300)] = np.round( 0.6 * x[:, np.arange(200, 300)] + 0.6 * x[:, np.arange(300, 400)] ) - a = GS.GaussSuppressionFromData( + a = gs.GaussSuppressionFromData( z3, np.arange(1, 7), 7, @@ -128,17 +125,17 @@ def test_advanced_with_integer_overflow(): def test_structural_empty_and_remove_empty(): - a1 = GS.GaussSuppressionFromData( + a1 = gs.GaussSuppressionFromData( z3.iloc[np.arange(100, 300)], np.arange(1, 7), 7, printInc=printInc ) - a2 = GS.GaussSuppressionFromData( + a2 = gs.GaussSuppressionFromData( z3.iloc[np.arange(100, 300)], np.arange(1, 7), 7, printInc=printInc, structuralEmpty=True, ) - a3 = GS.GaussSuppressionFromData( + a3 = gs.GaussSuppressionFromData( z3.iloc[np.arange(100, 300)], np.arange(1, 7), 7, @@ -151,70 +148,77 @@ def test_structural_empty_and_remove_empty(): assert np.all(a1.loc[k, "ant"] == 0) +def test_extend0_and_various_hierarchy_input(): + z2 = st.SSBtoolsData("z2") + + with wr.ToggleRView(True): # add warnings for these unexpected results + dimLists = st.FindDimLists(z2.drop("ant", axis=1)) + hi = bs.list( + bs.c("region", "fylke", "kostragr"), hovedint=dimLists.to_py()["hovedint"] + ) + + a1 = gs.GaussSuppressionFromData(z2, np.arange(1, 5), 5, printInc=printInc) + a2 = gs.GaussSuppressionFromData( + z2, freqVar="ant", hierarchies=dimLists, printInc=printInc + ) + a3 = gs.GaussSuppressionFromData( + z2, freqVar="ant", hierarchies=hi, printInc=printInc + ) + + assert np.all(a1.reset_index(drop=True) == a2.reset_index(drop=True)) + assert np.all(a3.reset_index(drop=True) == a2.reset_index(drop=True)) + + # +# z2_ = z2[z2["ant"] != 0, ] # +# a1 = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = True, output = "publish_inner", printInc = printInc) # -# test_that("extend0 and various hierarchy input", { -# z2 = SSBtoolsData("z2") -# dimLists = SSBtools::FindDimLists(z2[, -5]) -# hi = list(c("region", "fylke", "kostragr"), hovedint = dimLists["hovedint"]) -# -# a1 = GaussSuppressionFromData(z2, np.arange(1, 4+1), 5, printInc = printInc) -# a2 = GaussSuppressionFromData(z2, freqVar = "ant", hierarchies = dimLists, printInc = printInc) -# a3 = GaussSuppressionFromData(z2, freqVar = "ant", hierarchies = hi, printInc = printInc) -# -# expect_identical(a1, a2) -# expect_identical(a3, a2) -# -# z2_ = z2[z2["ant"] != 0, ] -# -# a1 = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = True, output = "publish_inner", printInc = printInc) -# -# expect_identical(a1["publish"], a2) +# expect_identical(a1["publish"], a2) # -# a2 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = True, output = "publish_inner", printInc = printInc) -# a3 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = True, output = "publish_inner", printInc = printInc) +# a2 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = True, output = "publish_inner", printInc = printInc) +# a3 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = True, output = "publish_inner", printInc = printInc) # -# if (False) { # Include code that shows differences -# tail(a1["inner"]) -# tail(a2["inner"]) -# tail(a3["inner"]) -# } +# if (False) { # Include code that shows differences +# tail(a1["inner"]) +# tail(a2["inner"]) +# tail(a3["inner"]) +# } # -# expect_identical(a1["publish"], a2$publish) -# expect_identical(a3["publish"], a2$publish) +# expect_identical(a1["publish"], a2$publish) +# expect_identical(a3["publish"], a2$publish) # -# expect_equal(a1["inner[names"](a2$inner)], a2$inner, ignore_attr = True) -# expect_equal(a3["inner[names"](a1$inner)], a1$inner, ignore_attr = True) +# expect_equal(a1["inner[names"](a2$inner)], a2$inner, ignore_attr = True) +# expect_equal(a3["inner[names"](a1$inner)], a1$inner, ignore_attr = True) # -# a1_ = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = "all", output = "publish_inner", printInc = printInc) -# a2_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) +# a1_ = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = "all", output = "publish_inner", printInc = printInc) +# a2_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) +# a3_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) # -# expect_identical(a1, a1_) -# expect_identical(a2, a2_) -# expect_identical(a3, a3_) +# expect_identical(a1, a1_) +# expect_identical(a2, a2_) +# expect_identical(a3, a3_) # -# z2__ = z2_[z2_["hovedint"] != "trygd", ] +# z2__ = z2_[z2_["hovedint"] != "trygd", ] # -# a2 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) +# a2 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) +# a3 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) # -# expect_identical(a3["publish"], a2$publish) -# expect_equal(a3["inner[names"](a2$inner)], a2$inner, ignore_attr = True) +# expect_identical(a3["publish"], a2$publish) +# expect_equal(a3["inner[names"](a2$inner)], a2$inner, ignore_attr = True) # -# expect_identical(lapply(c(a2, a3), dim), lapply(c(a2_, a3_), dim)) +# expect_identical(lapply(c(a2, a3), dim), lapply(c(a2_, a3_), dim)) # -# z2___ = z2__[z2__["fylke"] != 10, ] +# z2___ = z2__[z2__["fylke"] != 10, ] # -# a2_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) +# a2_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) +# a3_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) # -# expect_identical(lapply(a2, dim), lapply(a2_, dim)) +# expect_identical(lapply(a2, dim), lapply(a2_, dim)) # -# expect_true(nrow(a3_["inner"]) < nrow(a3$inner)) -# expect_true(nrow(a3_["publish"]) < nrow(a3$publish)) -# }) +# expect_true(nrow(a3_["inner"]) < nrow(a3$inner)) +# expect_true(nrow(a3_["publish"]) < nrow(a3$publish)) +# }) # # # From 94d0a124f8698cc6c27ba5bee1d690963d8bded7 Mon Sep 17 00:00:00 2001 From: kss2k Date: Mon, 28 Oct 2024 19:51:35 +0100 Subject: [PATCH 3/5] add one more test --- tests/test_gausssuppressionfromdata.py | 195 ++++++++++++++++++------- 1 file changed, 142 insertions(+), 53 deletions(-) diff --git a/tests/test_gausssuppressionfromdata.py b/tests/test_gausssuppressionfromdata.py index 0acfb3a..3d62cf1 100644 --- a/tests/test_gausssuppressionfromdata.py +++ b/tests/test_gausssuppressionfromdata.py @@ -168,60 +168,149 @@ def test_extend0_and_various_hierarchy_input(): assert np.all(a1.reset_index(drop=True) == a2.reset_index(drop=True)) assert np.all(a3.reset_index(drop=True) == a2.reset_index(drop=True)) + z2_ = z2.iloc[z2["ant"].to_numpy() != 0] + + a1 = gs.GaussSuppressionFromData( + z2_, np.arange(1, 5), 5, extend0=True, output="publish_inner", printInc=printInc + ) + + assert np.all(a1["publish"].reset_index(drop=True) == a2.reset_index(drop=True)) + + a2 = gs.GaussSuppressionFromData( + z2_, + freqVar="ant", + hierarchies=dimLists, + extend0=True, + output="publish_inner", + printInc=printInc, + ) + a3 = gs.GaussSuppressionFromData( + z2_, + freqVar="ant", + hierarchies=hi, + extend0=True, + output="publish_inner", + printInc=printInc, + ) + + assert np.all( + a1["publish"].reset_index(drop=True) == a2["publish"].reset_index(drop=True) + ) + assert np.all( + a3["publish"].reset_index(drop=True) == a2["publish"].reset_index(drop=True) + ) + + assert np.all( + a1["inner"][a2["inner"].columns].reset_index(drop=True) + == a2["inner"].reset_index(drop=True) + ) + assert np.all( + a3["inner"][a1["inner"].columns].reset_index(drop=True) + == a1["inner"].reset_index(drop=True) + ) + + a1_ = gs.GaussSuppressionFromData( + z2_, + np.arange(1, 5), + 5, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + a2_ = gs.GaussSuppressionFromData( + z2_, + freqVar="ant", + hierarchies=dimLists, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + a3_ = gs.GaussSuppressionFromData( + z2_, + freqVar="ant", + hierarchies=hi, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + + assert np.all( + a1["publish"].reset_index(drop=True) == a1_["publish"].reset_index(drop=True) + ) + assert np.all( + a1["inner"].reset_index(drop=True) == a1_["inner"].reset_index(drop=True) + ) + assert np.all( + a2["publish"].reset_index(drop=True) == a2_["publish"].reset_index(drop=True) + ) + assert np.all( + a2["inner"].reset_index(drop=True) == a2_["inner"].reset_index(drop=True) + ) + assert np.all( + a3["publish"].reset_index(drop=True) == a3_["publish"].reset_index(drop=True) + ) + assert np.all( + a3["inner"].reset_index(drop=True) == a3_["inner"].reset_index(drop=True) + ) + + z2__ = z2_.loc[z2_["hovedint"] != "trygd"] + + a2 = gs.GaussSuppressionFromData( + z2__, + freqVar="ant", + hierarchies=dimLists, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + a3 = gs.GaussSuppressionFromData( + z2__, + freqVar="ant", + hierarchies=hi, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + + assert np.all( + a2["publish"].reset_index(drop=True) == a3["publish"].reset_index(drop=True) + ) + assert np.all( + a3["inner"][a2["inner"].columns].reset_index(drop=True) + == a2["inner"].reset_index(drop=True) + ) + + assert np.all(a2["publish"].shape == a2_["publish"].shape) + assert np.all(a2["inner"].shape == a2_["inner"].shape) + assert np.all(a3["publish"].shape == a3_["publish"].shape) + assert np.all(a3["inner"].shape == a3_["inner"].shape) + + z2___ = z2__.loc[z2__["fylke"] != 10] + + a2_ = gs.GaussSuppressionFromData( + z2___, + freqVar="ant", + hierarchies=dimLists, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + a3_ = gs.GaussSuppressionFromData( + z2___, + freqVar="ant", + hierarchies=hi, + extend0="all", + output="publish_inner", + printInc=printInc, + ) + + assert np.all(a2["publish"].shape == a2_["publish"].shape) + assert np.all(a2["inner"].shape == a2_["inner"].shape) + + assert a3_["inner"].shape[0] < a3["inner"].shape[0] + assert a3_["publish"].shape[0] < a3["publish"].shape[0] + -# -# z2_ = z2[z2["ant"] != 0, ] -# -# a1 = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = True, output = "publish_inner", printInc = printInc) -# -# expect_identical(a1["publish"], a2) -# -# a2 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = True, output = "publish_inner", printInc = printInc) -# a3 = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = True, output = "publish_inner", printInc = printInc) -# -# if (False) { # Include code that shows differences -# tail(a1["inner"]) -# tail(a2["inner"]) -# tail(a3["inner"]) -# } -# -# expect_identical(a1["publish"], a2$publish) -# expect_identical(a3["publish"], a2$publish) -# -# expect_equal(a1["inner[names"](a2$inner)], a2$inner, ignore_attr = True) -# expect_equal(a3["inner[names"](a1$inner)], a1$inner, ignore_attr = True) -# -# a1_ = GaussSuppressionFromData(z2_, np.arange(1, 4+1), 5, extend0 = "all", output = "publish_inner", printInc = printInc) -# a2_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3_ = GaussSuppressionFromData(z2_, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) -# -# expect_identical(a1, a1_) -# expect_identical(a2, a2_) -# expect_identical(a3, a3_) -# -# z2__ = z2_[z2_["hovedint"] != "trygd", ] -# -# a2 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3 = GaussSuppressionFromData(z2__, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) -# -# expect_identical(a3["publish"], a2$publish) -# expect_equal(a3["inner[names"](a2$inner)], a2$inner, ignore_attr = True) -# -# expect_identical(lapply(c(a2, a3), dim), lapply(c(a2_, a3_), dim)) -# -# z2___ = z2__[z2__["fylke"] != 10, ] -# -# a2_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = dimLists, extend0 = "all", output = "publish_inner", printInc = printInc) -# a3_ = GaussSuppressionFromData(z2___, freqVar = "ant", hierarchies = hi, extend0 = "all", output = "publish_inner", printInc = printInc) -# -# expect_identical(lapply(a2, dim), lapply(a2_, dim)) -# -# expect_true(nrow(a3_["inner"]) < nrow(a3$inner)) -# expect_true(nrow(a3_["publish"]) < nrow(a3$publish)) -# }) -# -# -# # test_that("DominanceRule and NcontributorsRule + CandidatesNum + singleton + forced/unsafe", { # set.seed(123) # z = SSBtools::MakeMicro(SSBtoolsData("z2"), "ant") From c99ea4fbbc7bb39a8d0dd2e7e028bf18c2ff6076 Mon Sep 17 00:00:00 2001 From: kss2k Date: Tue, 29 Oct 2024 16:07:20 +0100 Subject: [PATCH 4/5] add test: "DominanceRule and NcontributorsRule + CandidatesNum + singleton + forced/unsafe" --- src/rwrapr/renv.py | 20 + tests/test_gausssuppressionfromdata.py | 777 ++++++++++++++++--------- 2 files changed, 538 insertions(+), 259 deletions(-) diff --git a/src/rwrapr/renv.py b/src/rwrapr/renv.py index e6a539e..937a111 100644 --- a/src/rwrapr/renv.py +++ b/src/rwrapr/renv.py @@ -212,6 +212,26 @@ def rclass(self, x: Any) -> RReturnType: foo: Callable[..., RReturnType] = rfunc("class") return foo(x) + def reval(self, expr: str, rview: bool) -> Any: + """ + Evaluates an R expression. + + Args: + expr (str): The R expression to evaluate. + rview (bool): If True, returns the result as an RView object. Defaults to False. + + Returns: + RReturnType: The result of the R expression. + """ + rview = rview or settings.rview_mode + + r_object: Any = ro.r(expr, invisible=True, print_r_warnings=False) + + if rview: + return RView(r_object) + else: + return convert_r2py(r_object) + def fetch_data( dataset: str, module: rpkg.Package | None diff --git a/tests/test_gausssuppressionfromdata.py b/tests/test_gausssuppressionfromdata.py index 3d62cf1..cce518d 100644 --- a/tests/test_gausssuppressionfromdata.py +++ b/tests/test_gausssuppressionfromdata.py @@ -311,259 +311,518 @@ def test_extend0_and_various_hierarchy_input(): assert a3_["publish"].shape[0] < a3["publish"].shape[0] -# test_that("DominanceRule and NcontributorsRule + CandidatesNum + singleton + forced/unsafe", { -# set.seed(123) -# z = SSBtools::MakeMicro(SSBtoolsData("z2"), "ant") -# z["char"] = sample(paste0("char", np.arange(1, 10+1)), nrow(z), replace = True) -# z["value"] = rnorm(nrow(z))^2 -# -# a = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# candidates = CandidatesNum, primary = DominanceRule, singletonMethod = "sub2Sum", -# n = c(1, 2), k = c(65, 85), printInc = printInc) -# -# -# b = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# candidates = CandidatesNum, primary = NcontributorsRule, singletonMethod = "none", -# removeCodes = paste0("char", np.arange(1, 2+1)), printInc = printInc) -# -# expect_identical(as.numeric(which(a["primary"])), c(8, 17, 18, 23, 52, 53, 58, 63, 73, 77, 78, 80, 83, 87, 90, 92, 97, 98)) -# expect_identical(as.numeric(which(b["primary"])), c(8, 18, 23, 53, 63, 78, 83, 87, 90, 97, 98)) -# -# -# z["seq2"] = (1:nrow(z))^2 -# -# aseq2 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = c("seq2", "value"), -# candidatesVar = "value", -# dominanceVar = "value", -# charVar = "char", candidates = CandidatesNum, -# primary = DominanceRule, singletonMethod = "sub2Sum", -# n = c(1, 2), k = c(65, 85), printInc = printInc) -# -# expect_identical(a[names(a)], aseq2[names(a)]) -# -# -# z["char"] = paste0("char", 1:nrow(z)) -# d1 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# candidates = CandidatesNum, primary = NcontributorsRule, singletonMethod = "none", -# removeCodes = paste0("char", np.arange(1, 20+1)), printInc = printInc, -# freqVar = "ant", preAggregate = False, maxN = 10, -# whenEmptyUnsuppressed = "stop") -# -# d2 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", -# candidates = CandidatesNum, primary = NContributorsRule, singletonMethod = "none", -# removeCodes = np.arange(1, 20+1), printInc = printInc, -# preAggregate = False, maxN = 10, # Empty freq in CandidatesNum -# whenEmptyUnsuppressed = "stop") -# -# expect_equal(d1[names(d1) != "ant"], d2, ignore_attr = True) -# -# -# if(True){ -# set.seed(123) -# z["value"] = rnorm(nrow(z))^2 # Need to generate again ... not same as above -# set.seed(1986) # Seed is not randomly chosen -# z["char"] = sample(paste0("char", c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), nrow(z), replace = True) -# b0 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, primary = NcontributorsRule, printInc = printInc, -# singleton = SingletonUniqueContributor, -# singletonMethod = "none") -# b1 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, primary = NcontributorsRule, printInc = printInc, -# singleton = SingletonUniqueContributor, -# singletonMethod = "sub2Sum") -# b2 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, primary = NcontributorsRule, printInc = printInc, -# singleton = SingletonUniqueContributor, -# singletonMethod = "numFTT") -# suppressWarnings({b3 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, -# primary = c(63, 73, 77), # primary = c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), -# forced = c(11, 13, 18, 20, 40), -# printInc = printInc, -# singleton = SingletonUniqueContributor, -# singletonMethod = "numFTT")}) -# suppressWarnings({b4 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, -# primary = c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), -# forced = c(11, 13, 18, 20, 40), -# printInc = printInc, -# singleton = SingletonUniqueContributor, -# singletonMethod = "numFTT")}) -# -# suppressWarnings({b5 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, -# primary = c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), -# forced = c(11, 13, 18, 20, 40), -# printInc = printInc, -# protectZeros = True)}) -# -# -# suppressWarnings({b6 = GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"), numVar = "value", charVar = "char", -# maxN = 2, candidates = CandidatesNum, -# primary = c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), -# forced = np.arange(1, 30+1), -# printInc = printInc, -# protectZeros = False)}) -# -# -# expect_equal(sum(b0["suppressed"]), 32) -# expect_equal(sum(b1["suppressed"]), 33) -# expect_equal(sum(b2["suppressed"]), 35) -# expect_equal(sum(b3["suppressed"]), 12) -# expect_equal(sum(b4["suppressed"]), 32) -# expect_equal(sum(b5["suppressed"]), 27) -# expect_equal(sum(b6["suppressed"]), 19) -# expect_equal(sum(b3["unsafe"]), 0) -# expect_equal(sum(b4["unsafe"]), 1) -# expect_equal(sum(b5["unsafe"]), 1) -# expect_equal(sum(b6["unsafe"]), 3) -# -# skip_on_cran() -# -# # Code to see differences: -# #"sub2Sum" solves G-problem -# #"numFTT" needed to solve K-problem. -# if (False) for (myChar in c("G", "K")) { -# kp = b0[b0["region"] == myChar & b0$primary, ] -# k0 = b0[b0["region"] == myChar & b0$suppressed, ] -# k1 = b1[b2["region"] == myChar & b1$suppressed, ] -# k2 = b2[b2["region"] == myChar & b2$suppressed, ] -# cat("===============", myChar, "=============== \n") -# for (kk in c("kp", "k0", "k1", "k2")) { -# cat(" -----", kk, "-----\n") -# ma = Match(z[c("region", "hovedint")], get(kk)[c("region", "hovedint")]) -# print(z[!is.na(ma), ]) -# } -# } -# sn = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 1, -# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0) -# sf = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, -# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0) -# sum_suppressed = integer(0) -# for (m1 in c("none", "anySumNOTprimary")) -# for (m2 in c("none", "sub2Sum", "numFTT")) { -# b = GaussSuppressionFromData(z, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = "char", maxN = 2, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# printInc = printInc, -# singleton = list(freq = as.logical(sf), num = as.integer(sn)), -# singletonMethod = c(freq = m1, num = m2)) -# sum_suppressed = c(sum_suppressed, sum(b["suppressed"])) -# } -# expect_equal(sum_suppressed, c(32, 33, 35, 35, 38, 40)) -# -# -# set.seed(1138) -# sum_suppressed = integer(0) -# zz = z[sample.int(nrow(z), 100, replace = True), ] -# for (c2 in c("F", "T")) -# for (c3 in c("F", "T", "H")) -# for (c4 in c("F", "T")) { -# b = GaussSuppressionFromData(zz, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = "char", -# maxN = 2, printInc = printInc, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# singleton = SingletonUniqueContributor, -# singletonMethod = paste0("numF", c2, c3, c4)) -# sum_suppressed = c(sum_suppressed, sum(b["suppressed"])) -# } -# expect_equal(sum_suppressed, c(49, 55, 51, 55, 53, 55, 49, 57, 52, 57, 55, 57)) -# -# # Why extra primary needed for 5:Total when "numFTH" -# # can be seen by looking at -# # b[b["region"] == 5, ] -# # zz[zz["fylke"] == 5 & zz$hovedint == "annet", ] -# # zz[zz["fylke"] == 5 & zz$hovedint == "arbeid", ] -# # zz[zz["fylke"] == 5 & zz$hovedint == "soshjelp", ] -# -# sum_suppressed = integer(0) -# for (singletonMethod in c("numFFF", "numtFF","numTFF", "numtTT", "numtTH", "numtTFT", "numtTHT")) { -# b = GaussSuppressionFromData(zz, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = "char", -# maxN = 2, printInc = printInc, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# singleton = SingletonUniqueContributor, -# singletonMethod = singletonMethod, -# inputInOutput = c(False, True)) # singleton not in publish and therefore not primary suppressed -# sum_suppressed = c(sum_suppressed, sum(b["suppressed"])) -# } -# expect_equal(sum_suppressed, c(17, 18, 18, 19, 19, 23, 23)) -# -# -# # To make non-suppressed singletons -# SUC = function(..., removeCodes, primary) SingletonUniqueContributor(..., removeCodes = character(0), primary = integer(0)) -# sum_suppressed = integer(0) -# for (singletonMethod in c("numFFF", "numtFF","numTFF")) { -# b = GaussSuppressionFromData(zz, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = "char", -# maxN = 2, printInc = printInc, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# removeCodes = "char1", -# singleton = SUC, -# singletonMethod = singletonMethod) -# sum_suppressed = c(sum_suppressed, c(59, 59, 67)) -# } -# -# zz["char[np.arange"](1,15)] = "char5" -# expect_warning({b = GaussSuppressionFromData(zz, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = "char", -# maxN = 2, printInc = printInc, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# singleton = SingletonUniqueContributor, -# singletonMethod = "numFTFW")}) -# expect_equal(sum(b["suppressed"]), 51) # Here "if (s_unique == primarySingletonNum[i])" in SSBtools::GaussSuppression matters. -# -# -# set.seed(193) -# zz["A"] = sample(paste0("A", c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), nrow(zz), replace = True) -# zz["B"] = sample(paste0("B", c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), nrow(zz), replace = True) -# rcd = data.frame(char = "char2", A = c("A1", "A2"), B = "B1") -# removeCodes = list(NULL, rcd, as.list(rcd)) -# k = integer(0) -# for (specialMultiple in c(False, True)) for (i in np.arange(1, 3+1)) { -# b = GaussSuppressionFromData(zz, -# dimVar = c("region", "fylke", "kostragr", "hovedint"), -# numVar = "value", charVar = c("char","A","B"), -# maxN = 2, printInc = printInc, -# candidates = CandidatesNum, -# primary = NcontributorsRule, -# singleton = SingletonUniqueContributor, -# singletonMethod = "numTTTTT", output = "inputGaussSuppression", -# specialMultiple = specialMultiple, -# removeCodes = removeCodes[[i]]) -# k = c(k, 0, as.vector(table(b["singleton"])[as.character(unique(b$singleton))])) -# } -# expect_equal(k, c(0, 1, 1, 1, 1, 1, 2, 19, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, -# 2, 20, 1, 1, 1, 0, 1, 29, 0, 2, 6, 3, 9, 9, 1, 0, 2, -# 5, 3, 9, 10, 1, 0, 2, 5, 1, 1, 2, 17, 2)) -# } -# }) +def dominancerule_and_ncontributorsrule_CandidatesNum_singleton_forced_unsafe(): + bs.set_seed(123) + z = st.MakeMicro(st.SSBtoolsData("z2"), "ant") + z["char"] = bs.sample(bs.paste0("char", np.arange(1, 11)), bs.nrow(z), replace=True) + z["value"] = bs.rnorm(bs.nrow(z)) ** 2 + + CandidatesNum = gs.reval("CandidatesNum", rview=True) + DominanceRule = gs.reval("DominanceRule", rview=True) + a = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + candidates=CandidatesNum, + primary=DominanceRule, + singletonMethod="sub2Sum", + n=bs.c(1, 2), + k=bs.c(65, 85), + printInc=printInc, + ) + + NContributorsRule = gs.reval("NContributorsRule", rview=True) + b = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + candidates=CandidatesNum, + primary=NContributorsRule, + singletonMethod="none", + removeCodes=bs.paste0("char", np.arange(1, 3)), + printInc=printInc, + ) + + assert np.all( + bs.as_numeric(bs.which(a["primary"])) + == bs.c(8, 17, 18, 23, 52, 53, 58, 63, 73, 77, 78, 80, 83, 87, 90, 92, 97, 98) + ) + assert np.all( + bs.as_numeric(bs.which(b["primary"])) + == bs.c(8, 18, 23, 53, 63, 78, 83, 87, 90, 97, 98) + ) + + z["seq2"] = (np.arange(z.shape[0]) + 1) ** 2 + + aseq2 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar=bs.c("seq2", "value"), + candidatesVar="value", + dominanceVar="value", + charVar="char", + candidates=CandidatesNum, + primary=DominanceRule, + singletonMethod="sub2Sum", + n=bs.c(1, 2), + k=bs.c(65, 85), + printInc=printInc, + ) + + assert np.all(a[bs.names(a)] == aseq2[bs.names(a)]) + + z["char"] = bs.paste0("char", np.arange(bs.nrow(z)) + 1) + NcontributorsRule = gs.reval("NcontributorsRule", rview=True) + d1 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + candidates=CandidatesNum, + primary=NcontributorsRule, + singletonMethod="none", + removeCodes=bs.paste0("char", np.arange(1, 21)), + printInc=printInc, + freqVar="ant", + preAggregate=False, + maxN=10, + whenEmptyUnsuppressed="stop", + ) + + d2 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + candidates=CandidatesNum, + primary=NContributorsRule, + singletonMethod="none", + removeCodes=np.arange(1, 21), + printInc=printInc, + preAggregate=False, + maxN=10, # Empty freq in CandidatesNum + whenEmptyUnsuppressed="stop", + ) + + assert np.all(d1.loc[:, bs.names(d1) != "ant"] == d2) + + bs.set_seed(123) + z["value"] = ( + bs.rnorm(bs.nrow(z)) ** 2 + ) # Need to generate again ... not same as above + bs.set_seed(1986) # Seed is not randomly chosen + z["char"] = bs.sample( + bs.paste0("char", bs.c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), bs.nrow(z), replace=True + ) + SingletonUniqueContributor = gs.reval("SingletonUniqueContributor", rview=True) + b0 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=NcontributorsRule, + printInc=printInc, + singleton=SingletonUniqueContributor, + singletonMethod="none", + ) + b1 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=NcontributorsRule, + printInc=printInc, + singleton=SingletonUniqueContributor, + singletonMethod="sub2Sum", + ) + b2 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=NcontributorsRule, + printInc=printInc, + singleton=SingletonUniqueContributor, + singletonMethod="numFTT", + ) + b3 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=bs.c( + 63, 73, 77 + ), # primary = bs.c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), + forced=bs.c(11, 13, 18, 20, 40), + printInc=printInc, + singleton=SingletonUniqueContributor, + singletonMethod="numFTT", + ) + b4 = gs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=bs.c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), + forced=bs.c(11, 13, 18, 20, 40), + printInc=printInc, + singleton=SingletonUniqueContributor, + singletonMethod="numFTT", + ) + + b5 = bs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=bs.c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), + forced=bs.c(11, 13, 18, 20, 40), + printInc=printInc, + protectZeros=True, + ) + + b6 = bs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=bs.c(8, 18, 23, 53, 63, 73, 77, 78, 90, 97, 98, 100), + forced=np.arange(1, 30 + 1), + printInc=printInc, + protectZeros=False, + ) + + assert np.all(bs.sum(b0["suppressed"]) == 32) + assert np.all(bs.sum(b1["suppressed"]) == 33) + assert np.all(bs.sum(b2["suppressed"]) == 35) + assert np.all(bs.sum(b3["suppressed"]) == 12) + assert np.all(bs.sum(b4["suppressed"]) == 32) + assert np.all(bs.sum(b5["suppressed"]) == 27) + assert np.all(bs.sum(b6["suppressed"]) == 19) + assert np.all(bs.sum(b3["unsafe"]) == 0) + assert np.all(bs.sum(b4["unsafe"]) == 1) + assert np.all(bs.sum(b5["unsafe"]) == 1) + assert np.all(bs.sum(b6["unsafe"]) == 3) + + # Code to see differences: + # "sub2Sum" solves G-problem + # "numFTT" needed to solve K-problem. + sn = bs.c( + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 2, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ) + sf = bs.c( + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ) + sum_suppressed = bs.integer(0) + + for m1 in bs.c("none", "anySumNOTprimary"): + for m2 in bs.c("none", "sub2Sum", "numFTT"): + b = bs.GaussSuppressionFromData( + z, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + candidates=CandidatesNum, + primary=NcontributorsRule, + printInc=printInc, + singleton=bs.list(freq=bs.as_logical(sf), num=bs.as_integer(sn)), + singletonMethod=bs.c(freq=m1, num=m2), + ) + sum_suppressed = bs.c(sum_suppressed, bs.sum(b["suppressed"])) + + assert np.all(sum_suppressed == bs.c(32, 33, 35, 35, 38, 40)) + + sample_int = bs.function("sample.int") + bs.set_seed(1138) + sum_suppressed = bs.integer(0) + zz = z.iloc[sample_int(bs.nrow(z), 100, replace=True) - 1, :] + for c2 in bs.c("F", "T"): + for c3 in bs.c("F", "T", "H"): + for c4 in bs.c("F", "T"): + b = bs.GaussSuppressionFromData( + zz, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + printInc=printInc, + candidates=CandidatesNum, + primary=NcontributorsRule, + singleton=SingletonUniqueContributor, + singletonMethod=bs.paste0("numF", c2, c3, c4), + ) + sum_suppressed = bs.c(sum_suppressed, bs.sum(b["suppressed"])) + + assert np.all( + sum_suppressed == bs.c(49, 55, 51, 55, 53, 55, 49, 57, 52, 57, 55, 57) + ) + + # Why extra primary needed for 5:Total when "numFTH" + # can be seen by looking at + # b[b["region"] == 5, ] + # zz[zz["fylke"] == 5 & zz$hovedint == "annet", ] + # zz[zz["fylke"] == 5 & zz$hovedint == "arbeid", ] + # zz[zz["fylke"] == 5 & zz$hovedint == "soshjelp", ] + + sum_suppressed = bs.integer(0) + for singletonMethod in bs.c( + "numFFF", "numtFF", "numTFF", "numtTT", "numtTH", "numtTFT", "numtTHT" + ): + b = bs.GaussSuppressionFromData( + zz, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + printInc=printInc, + candidates=CandidatesNum, + primary=NcontributorsRule, + singleton=SingletonUniqueContributor, + singletonMethod=singletonMethod, + inputInOutput=bs.c(False, True), + ) # singleton not in publish and therefore not primary suppressed + sum_suppressed = bs.c(sum_suppressed, bs.sum(b["suppressed"])) + + assert np.all(sum_suppressed == bs.c(17, 18, 18, 19, 19, 23, 23)) + + # To make non-suppressed singletons + SUC = gs.reval( + "function(..., removeCodes, primary) SingletonUniqueContributor(..., removeCodes = character(0), primary = integer(0))", + rview=True, + ) + sum_suppressed = bs.integer(0) + for singletonMethod in bs.c("numFFF", "numtFF", "numTFF"): + b = bs.GaussSuppressionFromData( + zz, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + printInc=printInc, + candidates=CandidatesNum, + primary=NcontributorsRule, + removeCodes="char1", + singleton=SUC, + singletonMethod=singletonMethod, + ) + sum_suppressed = bs.c(sum_suppressed, bs.c(59, 59, 67)) + + zz["char"][np.arange(1, 16)] = "char5" + b = bs.GaussSuppressionFromData( + zz, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar="char", + maxN=2, + printInc=printInc, + candidates=CandidatesNum, + primary=NcontributorsRule, + singleton=SingletonUniqueContributor, + singletonMethod="numFTFW", + ) + assert np.all( + sum(b["suppressed"]) == 51 + ) # Here "if (s_unique == primarySingletonNum[i])" in SSBtools::GaussSuppression matters. + + bs.set_seed(193) + zz["A"] = bs.sample( + bs.paste0("A", bs.c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), bs.nrow(zz), replace=True + ) + zz["B"] = bs.sample( + bs.paste0("B", bs.c(1, 1, 1, 1, 1, 2, 2, 2, 3, 4)), bs.nrow(zz), replace=True + ) + rcd = bs.data_frame(char="char2", A=bs.c("A1", "A2"), B="B1") + removeCodes = bs.list(None, rcd, bs.as_list(rcd)) + k = bs.integer(0) + for specialMultiple in bs.c(False, True): + for i in np.arange(1, 3): + b = bs.GaussSuppressionFromData( + zz, + dimVar=bs.c("region", "fylke", "kostragr", "hovedint"), + numVar="value", + charVar=bs.c("char", "A", "B"), + maxN=2, + printInc=printInc, + candidates=CandidatesNum, + primary=NcontributorsRule, + singleton=SingletonUniqueContributor, + singletonMethod="numTTTTT", + output="inputGaussSuppression", + specialMultiple=specialMultiple, + removeCodes=removeCodes[i], + ) + k = bs.function( + "function(b, k) c(k, 0L, as.vector(table(b$singleton)[as.character(unique(b$singleton))]))" + )(b, k) + + assert np.all( + k, + bs.c( + 0, + 1, + 1, + 1, + 1, + 1, + 2, + 19, + 1, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 1, + 1, + 2, + 20, + 1, + 1, + 1, + 0, + 1, + 29, + 0, + 2, + 6, + 3, + 9, + 9, + 1, + 0, + 2, + 5, + 3, + 9, + 10, + 1, + 0, + 2, + 5, + 1, + 1, + 2, + 17, + 2, + ), + ) + + # # # # test_that("Interpret primary output correctly", { -# x = SSBtoolsData("sprt_emp_withEU")[, c(1, 2, 5, 3, 4)] +# x = SSBtoolsData("sprt_emp_withEU")[, bs.c(1, 2, 5, 3, 4)] # # p1 = function(num, ...) round(10 * num[, 1])%%10 == 3 # p2 = function(num, ...) round(10 * num)%%10 == 3 -# p3 = function(num, ...) as.data.frame(round(10 * num)%%10 == 3) -# p4 = function(num, ...) list(primary = as.data.frame(round(10 * num)%%10 == 3), +# p3 = function(num, ...) bs.as_data.frame(round(10 * num)%%10 == 3) +# p4 = function(num, ...) list(primary = bs.as_data.frame(round(10 * num)%%10 == 3), # numExtra = data.frame(numExtra = round(10 * num[, 1])%%10)) # # p12 = function(...) { # p = p2(...) -# p[] = as.integer(p) +# p[] = bs.as_integer(p) # p # } # @@ -591,12 +850,12 @@ def test_extend0_and_various_hierarchy_input(): # # # Single column xExtraPrimary, Matrix and matrix # -# x["freq"] = round(sqrt(x$ths_per) + as.integer(x$year) - 2014 + 0.2 * (-np.arange(7, 10+1))) +# x["freq"] = round(sqrt(x$ths_per) + bs.as_integer(x$year) - 2014 + 0.2 * (-np.arange(7, 10+1))) # z = x[x["year"] == "2014", -(np.arange(4, 5+1))] # # # K = function(primary) { -# GaussSuppressionFromData(data = z, formula = ~geo + age, freqVar = "freq", coalition=7, +# bs.GaussSuppressionFromData(data = z, formula = ~geo + age, freqVar = "freq", coalition=7, # primary = primary, # mc_hierarchies = NULL, upper_bound = Inf, # protectZeros = False, secondaryZeros = True, @@ -605,37 +864,37 @@ def test_extend0_and_various_hierarchy_input(): # } # # e1 = K(KDisclosurePrimary) -# e2 = K(function (...) as.matrix(KDisclosurePrimary(...))) +# e2 = K(function (...) bs.as_matrix(KDisclosurePrimary(...))) # -# expect_equal(max(abs(e2 - e1)), 0) -# expect_warning({e3 = K(function (...) round(1 + 0.1*as.matrix(KDisclosurePrimary(...))))}) # Warning message: Primary output interpreted as xExtraPrimary (rare case of doubt) -# expect_true(all(dim(e3) == c(6, 1))) +# assert np.all(max(abs(e2 - e1)), 0) +# expect_warning({e3 = K(function (...) round(1 + 0.1*bs.as_matrix(KDisclosurePrimary(...))))}) # Warning message: Primary output interpreted as xExtraPrimary (rare case of doubt) +# expect_true(all(dim(e3) == bs.c(6, 1))) # # }) # # # test_that("More NumSingleton", { # -# sum_suppressed = integer(0) -# for (seed in c(116162, 643426)) { -# set.seed(seed) +# sum_suppressed = bs.integer(0) +# for (seed in bs.c(116162, 643426)) { +# bs.set_seed(seed) # z = SSBtoolsData("magnitude1") -# set.seed(seed) -# z["company"] = z$company[sample.int(20)] -# z["value"] = z$value[sample.int(20)] +# bs.set_seed(seed) +# z["company"] = z$company[bs.samle_int(20)] +# z["value"] = z$value[bs.samle_int(20)] # dataset = SSBtools::SortRows(aggregate(z["value"], z[np.arange(1, 5+1)], sum)) -# for (c3 in c("F", "T", "H")) for (c4 in c("F", "t", "T")) for (c5 in c("F", "t", "T")) { +# for (c3 in bs.c("F", "T", "H")) for (c4 in bs.c("F", "t", "T")) for (c5 in bs.c("F", "t", "T")) { # if (!(c4 == "F" & c5 != "F")) { -# singletonMethod = paste0("numTt", c3, c4, c5) -# output = SuppressDominantCells(data = dataset, numVar = "value", dimVar = c("sector4", "geo"), contributorVar = "company", n = 1, k = 80, singletonMethod = singletonMethod, +# singletonMethod = bs.paste0("numTt", c3, c4, c5) +# output = SuppressDominantCells(data = dataset, numVar = "value", dimVar = bs.c("sector4", "geo"), contributorVar = "company", n = 1, k = 80, singletonMethod = singletonMethod, # printInc = False) -# sum_suppressed = c(sum_suppressed, sum(output["suppressed"])) +# sum_suppressed = bs.c(sum_suppressed, bs.sum(output["suppressed"])) # } # } # # } # -# expect_equal(sum_suppressed, c(8, 11, 13, 13, 11, 13, 13, 10, 11, 13, 13, 11, 13, 13, 10, +# assert np.all(sum_suppressed, bs.c(8, 11, 13, 13, 11, 13, 13, 10, 11, 13, 13, 11, 13, 13, 10, # 11, 13, 13, 11, 13, 13, 7, 9, 10, 12, 10, 11, 12, 8, 10, 10, # 12, 11, 11, 12, 8, 10, 10, 12, 11, 11, 12)) # From ac1547f663cf6d5acefda32e6278d3514a3320fc Mon Sep 17 00:00:00 2001 From: kss2k Date: Tue, 29 Oct 2024 16:11:26 +0100 Subject: [PATCH 5/5] Fix DAR203 ~Return: expected Any but was RReturnType, in renv.py --- src/rwrapr/renv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rwrapr/renv.py b/src/rwrapr/renv.py index 937a111..55942c3 100644 --- a/src/rwrapr/renv.py +++ b/src/rwrapr/renv.py @@ -221,7 +221,7 @@ def reval(self, expr: str, rview: bool) -> Any: rview (bool): If True, returns the result as an RView object. Defaults to False. Returns: - RReturnType: The result of the R expression. + Any: The result of the R expression, depends on rview argument and setting. """ rview = rview or settings.rview_mode