diff --git a/src/wrapr/RArray.py b/src/wrapr/RArray.py new file mode 100644 index 0000000..5d54ead --- /dev/null +++ b/src/wrapr/RArray.py @@ -0,0 +1,40 @@ +import numpy as np +import rpy2 +import rpy2.robjects.vectors as vc + +from numpy.typing import NDArray + + +def convert_numpy(x: vc.Vector | NDArray) -> NDArray | None: + if isinstance(x, rpy2.rinterface_lib.sexp.NULLType): + return None + match x: # this should be expanded upon + case vc.BoolVector() | vc.BoolArray() | vc.BoolMatrix(): + dtype = "bool" + case vc.FloatVector() | vc.FloatArray() | vc.FloatMatrix(): + dtype = "float" + case vc.IntVector() | vc.IntArray() | vc.IntMatrix(): + dtype = "int" + case vc.StrArray() | vc.StrVector() | vc.StrMatrix(): + dtype = "U" + case _: + dtype = None + + y = np.asarray(x, dtype=dtype) + return filter_numpy(y) + + +def filter_numpy(x: NDArray) -> NDArray | int | str | float | bool: + # sometimes a numpy array will have one element with shape (,) + # this should be (1,) + y = x[np.newaxis][0] if not x.shape else x + # if shape is (1,) we should just return as int | str | float | bool + # R doesn't have these types, only vectors/arrays, this will probably + # give unexpected results for users who are unfamiliar with R, so + # we return the first element instead + y = y[0] if y.shape == (1,) else y + return y + + +def is_valid_numpy(x: NDArray) -> bool: + return x.dtype.fields is None diff --git a/src/wrapr/RDataFrame.py b/src/wrapr/RDataFrame.py new file mode 100644 index 0000000..c4583f4 --- /dev/null +++ b/src/wrapr/RDataFrame.py @@ -0,0 +1,19 @@ +import pandas as pd + +import rpy2.robjects as ro +import rpy2.robjects.vectors as vc + +from typing import Any +from .RArray import convert_numpy + +def convert_pandas(df: vc.DataFrame) -> pd.DataFrame: + colnames = df.names + df_dict = {c: convert_numpy(x) for c, x in zip(colnames, list(df))} + return pd.DataFrame(df_dict) + + +def attempt_pandas_conversion(x: Any) -> Any: + try: + return pd.DataFrame(x) + except: + return x diff --git a/src/wrapr/__init__.py b/src/wrapr/__init__.py index fe8ceb3..312fb2c 100644 --- a/src/wrapr/__init__.py +++ b/src/wrapr/__init__.py @@ -5,4 +5,5 @@ from .library import library, importr from .load_namespace import try_load_namespace from .lazy_rexpr import lazily -from .robject import Robject +from .convert_r2py import Robject +from .settings import settings, Settings diff --git a/src/wrapr/convert_py2r.py b/src/wrapr/convert_py2r.py index c8734da..c8f990c 100644 --- a/src/wrapr/convert_py2r.py +++ b/src/wrapr/convert_py2r.py @@ -11,7 +11,7 @@ from rpy2.robjects import FloatVector, pandas2ri, numpy2ri from .rutils import rcall - +from .convert_r2py import Robject # We can uncomment this when we transition to 3.12 # type RBaseObject = ( # ro.FloatVector | ro.FloatVector | ro.IntVector | @@ -32,30 +32,30 @@ def convert_py2r(args: List[Any], kwargs: Dict[str, Any]) -> None: def convert_pyobject2r(x: Any) -> Any: # RBaseObject | PyDtype | Any: match x: + case Robject(): + return x.Robj case np.ndarray(): - out = convert_numpy2r(x) + return convert_numpy2r(x) case scipy.sparse.coo_array() | scipy.sparse.coo_matrix(): - out = convert_pysparsematrix(x) + return convert_pysparsematrix(x) case OrderedDict() | dict(): - out = dict2rlist(x) + return dict2rlist(x) case list() | tuple() | set(): - out = pylist2rlist(x) + return pylist2rlist(x) case pd.DataFrame(): - out = pandas2r(x) + return pandas2r(x) case NoneType(): - out = ro.NULL + return ro.NULL case np.bool_(): - out = bool(x) + return bool(x) case np.int8() | np.int16() | np.int32() | np.int64(): - out = int(x) - case np.float16() | np.float32() | np.float64() | np.float128(): - out = float(x) + return int(x) + case np.float16() | np.float32() | np.float64(): + return float(x) case np.str_() | np.bytes_(): - out = str(x) + return str(x) case _: - out = x - return out - + return x def convert_numpy2r(x: NDArray) -> Any: # RBaseObject: @@ -117,7 +117,8 @@ def dict2rlist(x: Dict | OrderedDict) -> ro.ListVector: def pylist2rlist(x: List | Tuple | Set) -> ro.ListVector: y: Dict[str, Any] = {str(k): v for k, v in enumerate(x)} - return ro.ListVector(dict2rlist(y)) + unname: Callable = rcall("unname") + return unname(dict2rlist(y)) def convert_pysparsematrix(x: scipy.sparse.coo_array | scipy.sparse.coo_matrix): diff --git a/src/wrapr/convert_r2py.py b/src/wrapr/convert_r2py.py index ea710ba..3dcbf67 100644 --- a/src/wrapr/convert_r2py.py +++ b/src/wrapr/convert_r2py.py @@ -15,6 +15,35 @@ from .lazy_rexpr import lazily, lazy_wrap from .rutils import rcall +from .RDataFrame import convert_pandas, attempt_pandas_conversion +from .RArray import convert_numpy, is_valid_numpy, filter_numpy + + +class Robject(): + def __init__(self, Robj: Any): + self.Robj = Robj + + def __str__(self) -> str: + # return captureRprint(self.Robj) + return self.Robj.__str__() + + def __repr__(self): + # return self.Robj.__repr__() + return self.Robj.__str__() + + def __getattr__(self, name: str) -> Any: + fun: Callable = rfunc(name) + return fun(self.Robj) + + def __getitem__(self, *args): + return self.Robj.__getitem__(*args) + + def __iter__(self): + return self.Robj.__iter__() + + def to_py(self): + return convert_r2py(self.Robj) + def convert_r2py(x: Any) -> Any: match x: @@ -22,8 +51,6 @@ def convert_r2py(x: Any) -> Any: return x case rpy2.rinterface_lib.sexp.NULLType(): return None - case ro.methods.RS4(): - return convert_s4(x) case vc.DataFrame(): return convert_pandas(x) case vc.Vector() | vc.Matrix() | vc.Array() if not is_rlist(x): @@ -45,7 +72,7 @@ def convert_r2py(x: Any) -> Any: case vc.ListSexpVector() | vc.ListVector(): return convert_rlist2py(x) case _: - return generic_conversion(x) + return Robject(x) def convert_list(X: List | Tuple) -> Any: @@ -85,83 +112,3 @@ def convert_dict(X: Dict | OrderedDict, X[key] = convert_r2py(X[key]) finally: return X - - -def convert_numpy(x: vc.Vector | NDArray) -> NDArray | None: - if isinstance(x, rpy2.rinterface_lib.sexp.NULLType): - return None - match x: # this should be expanded upon - case vc.BoolVector() | vc.BoolArray() | vc.BoolMatrix(): - dtype = "bool" - case vc.FloatVector() | vc.FloatArray() | vc.FloatMatrix(): - dtype = "float" - case vc.IntVector() | vc.IntArray() | vc.IntMatrix(): - dtype = "int" - case vc.StrArray() | vc.StrVector() | vc.StrMatrix(): - dtype = "U" - case _: - dtype = None - - y = np.asarray(x, dtype=dtype) - return filter_numpy(y) - - -def filter_numpy(x: NDArray) -> NDArray | int | str | float | bool: - # sometimes a numpy array will have one element with shape (,) - # this should be (1,) - y = x[np.newaxis][0] if not x.shape else x - # if shape is (1,) we should just return as int | str | float | bool - # R doesn't have these types, only vectors/arrays, this will probably - # give unexpected results for users who are unfamiliar with R, so - # we return the first element instead - y = y[0] if y.shape == (1,) else y - return y - - -def is_valid_numpy(x: NDArray) -> bool: - return x.dtype.fields is None - - -def convert_pandas(df: vc.DataFrame) -> pd.DataFrame: - colnames = df.names - df_dict = {c: convert_numpy(x) for c, x in zip(colnames, list(df))} - return pd.DataFrame(df_dict) - - with (ro.default_converter + pandas2ri.converter).context(): - pd_df = ro.conversion.get_conversion().rpy2py(df) - return pd_df - - -def attempt_pandas_conversion(x: Any) -> Any: - try: - return pd.DataFrame(x) - except: - return x - - -def generic_conversion(x: Any) -> Any: - try: - arr = np.asarray(x) - if not is_valid_numpy(arr): - raise Error - return arr - except: - return attempt_pandas_conversion(x) - - -def convert_s4(x: ro.methods.RS4) -> Any: - rclass = get_rclass(x) - if rclass is None: - return generic_conversion(x) - - match np_collapse(rclass): - case "dgCMatrix": # to do: put this in a seperate function - dense = convert_numpy(as_matrix(x)) - sparse = scipy.sparse.coo_matrix(dense) - return sparse - case _: - return generic_conversion(x) - - - - diff --git a/src/wrapr/function_wrapper.py b/src/wrapr/function_wrapper.py index 373558c..dc8f9c9 100644 --- a/src/wrapr/function_wrapper.py +++ b/src/wrapr/function_wrapper.py @@ -4,44 +4,10 @@ from numpy.typing import NDArray from typing import Any, Callable, Dict, List from .convert_py2r import convert_py2r -from .convert_r2py import convert_r2py +from .convert_r2py import convert_r2py, Robject from .rutils import rcall from .lazy_rexpr import lazy_wrap -from .robject import Robject - -# def robjectwrap(py_object: Any, r_object: Any = None) -> Any: -# if py_object is None: -# return None -# -# class RobjectWrapper(type(py_object)): -# @classmethod -# def from_existing(cls, existing_instance, new_r_object): -# # Create a new instance of RobjectWrapper -# new_instance = cls(existing_instance._obj, new_r_object) -# return new_instance -# -# def __strip__(self): -# return self._obj -# -# # Wrap the initial py_object -# wrapped_object = RobjectWrapper(py_object, r_object) -# -# return wrapped_object -# -# -# def strip_RobjectWrapper(x: Any) -> Any: -# if hasattr(x, "__strip__"): -# return x.__strip__() -# else: -# return x - - -# def strip_args(args: List[Any], kwargs: Dict[str, Any]) -> None: -# for i, x in enumerate(args): -# args[i] = strip_RobjectWrapper(x) -# for k, v in kwargs.items(): -# kwargs[k] = strip_RobjectWrapper(v) - +from .settings import Settings, settings def wrap_rfunc(func: Callable | Any, name: str | None) -> Callable | Any: # should be a Callable, but may f-up (thus Any) @@ -55,9 +21,12 @@ def wrap(*args, **kwargs): lazyfunc = lazy_wrap(args=args, kwargs=kwargs, func=func, func_name=name) r_object: Any = lazyfunc(*args, **kwargs) - py_object = convert_r2py(r_object) - # return robjectwrap(py_object, r_object) - return py_object + + if settings.Rview: + return Robject(r_object) + else: + return convert_r2py(r_object) + try: wrap.__doc__ = func.__doc__ diff --git a/src/wrapr/renv.py b/src/wrapr/renv.py index be2286f..5bbea57 100644 --- a/src/wrapr/renv.py +++ b/src/wrapr/renv.py @@ -16,6 +16,9 @@ from .utils import ROutputCapture, pinfo from .function_wrapper import rfunc, wrap_rfunc # wrap_rfunc should perhaps be its own module from .rutils import rcall +from .convert_r2py import convert_r2py, Robject +from .settings import Settings, settings + class Renv: def __init__(self, env_name): @@ -95,14 +98,19 @@ def function(self, expr: str) -> Callable: # return attributes -def fetch_data(dataset: str, module: rpkg.Package) -> pd.DataFrame | None: - with (ro.default_converter + pandas2ri.converter).context(): - try: - return rpkg.data(module).fetch(dataset)[dataset] - except KeyError: - return None - except: - return None +def fetch_data(dataset: str, module: rpkg.Package) -> pd.DataFrame | Robject | None: + try: + r_object = rpkg.data(module).fetch(dataset)[dataset] + + if settings.Rview: + return Robject(r_object) + else: + return convert_r2py(r_object) + + except KeyError: + return None + except: + return None def get_assets(env_name: str, module: rpkg.Package) -> Tuple[Set[str], Set[str]]: diff --git a/src/wrapr/rlist.py b/src/wrapr/rlist.py new file mode 100644 index 0000000..abfa122 --- /dev/null +++ b/src/wrapr/rlist.py @@ -0,0 +1,6 @@ +from collections import UserList + + +a = UserList([1, 2, 3, 4]) +setattr(a, "rowstart", 1) +a.rowstart diff --git a/src/wrapr/robject.py b/src/wrapr/robject.py deleted file mode 100644 index d018d05..0000000 --- a/src/wrapr/robject.py +++ /dev/null @@ -1,31 +0,0 @@ -from logging import captureWarnings -from typing import Any -import rpy2.robjects as ro -from .rutils import rcall -from .convert_r2py import convert_r2py - - -class Robject(): - def __init__(self, Robj: Any): - self.Robj = Robj - - def __str__(self) -> str: - return captureRprint(self.Robj) - - def __repr__(self): - return self.Robj.__repr__() - - def __getattr__(self, name: str) -> Any: - fun: Callable = rfunc(name) - return fun(self.Robj) - - def __getitem__(self, *args): - return self.Robj.__getitem__(*args) - - def __iter__(self): - return self.Robj.__iter__() - - -def captureRprint(x) -> str: - expr = r'function(x) paste(utils::capture.output(print(x)), collapse = "\n")' - return convert_r2py(rcall(expr)(x)[0]) diff --git a/src/wrapr/settings.py b/src/wrapr/settings.py new file mode 100644 index 0000000..fc21e66 --- /dev/null +++ b/src/wrapr/settings.py @@ -0,0 +1,17 @@ +class Settings: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super(Settings, cls).__new__(cls) + cls._instance._initialize(**kwargs) + return cls._instance + + def _initialize(self): + self.Rview = False + + def set_Rview(self, rview: bool): + self.Rview = rview + + +settings = Settings() diff --git a/src/wrapr/utils.py b/src/wrapr/utils.py index f30f586..88ee7f2 100644 --- a/src/wrapr/utils.py +++ b/src/wrapr/utils.py @@ -15,22 +15,13 @@ def __init__(self): self.stderr_orig = None def capture_r_output(self): - """Redirects R console output to Python lists.""" - # Define custom functions to capture output def add_to_stdout(line): self.stdout.append(line) def add_to_stderr(line): self.stderr.append(line) - - # Keep original callbacks self.stdout_orig = rpy2.rinterface_lib.callbacks.consolewrite_print self.stderr_orig = rpy2.rinterface_lib.callbacks.consolewrite_warnerror - - # Replace with custom callbacks rpy2.rinterface_lib.callbacks.consolewrite_print = add_to_stdout rpy2.rinterface_lib.callbacks.consolewrite_warnerror = add_to_stderr def reset_r_output(self): - """Resets the R output callbacks to their original state.""" rpy2.rinterface_lib.callbacks.consolewrite_print = self.stdout_orig rpy2.rinterface_lib.callbacks.consolewrite_warnerror = self.stderr_orig - - diff --git a/tests/Robject.py b/tests/Robject.py index f09d79d..c35a8c0 100644 --- a/tests/Robject.py +++ b/tests/Robject.py @@ -1,8 +1,15 @@ import wrapr as wr -# import rpy2 -# from rpy2.robjects.packages import importr -# -# -# bs = importr("base") -# l = bs.list([1, 2, 3, 4]) -# L = wr.Robject(l) + +bs = wr.library("base") +dt = wr.library("datasets") + +l1 = bs.list([1, 2, 3, 4]) +iris1 = dt.iris + +bs = wr.library("base") +dt = wr.library("datasets") + +wr.settings.set_Rview(True) +l2 = bs.list([1, 2, 3, 4]) +iris2 = dt.iris +iris2.to_py()