Skip to content

Commit

Permalink
split convert_r2py
Browse files Browse the repository at this point in the history
  • Loading branch information
pph2p committed Sep 17, 2024
1 parent 30bfef1 commit 93f8de9
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 194 deletions.
40 changes: 40 additions & 0 deletions src/wrapr/RArray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
import rpy2
import rpy2.robjects.vectors as vc

from numpy.typing import NDArray


def convert_numpy(x: vc.Vector | NDArray) -> NDArray | None:
if isinstance(x, rpy2.rinterface_lib.sexp.NULLType):
return None
match x: # this should be expanded upon
case vc.BoolVector() | vc.BoolArray() | vc.BoolMatrix():
dtype = "bool"
case vc.FloatVector() | vc.FloatArray() | vc.FloatMatrix():
dtype = "float"
case vc.IntVector() | vc.IntArray() | vc.IntMatrix():
dtype = "int"
case vc.StrArray() | vc.StrVector() | vc.StrMatrix():
dtype = "U"
case _:
dtype = None

y = np.asarray(x, dtype=dtype)
return filter_numpy(y)


def filter_numpy(x: NDArray) -> NDArray | int | str | float | bool:
# sometimes a numpy array will have one element with shape (,)
# this should be (1,)
y = x[np.newaxis][0] if not x.shape else x
# if shape is (1,) we should just return as int | str | float | bool
# R doesn't have these types, only vectors/arrays, this will probably
# give unexpected results for users who are unfamiliar with R, so
# we return the first element instead
y = y[0] if y.shape == (1,) else y
return y


def is_valid_numpy(x: NDArray) -> bool:
return x.dtype.fields is None
19 changes: 19 additions & 0 deletions src/wrapr/RDataFrame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as pd

import rpy2.robjects as ro
import rpy2.robjects.vectors as vc

from typing import Any
from .RArray import convert_numpy

def convert_pandas(df: vc.DataFrame) -> pd.DataFrame:
colnames = df.names
df_dict = {c: convert_numpy(x) for c, x in zip(colnames, list(df))}
return pd.DataFrame(df_dict)


def attempt_pandas_conversion(x: Any) -> Any:
try:
return pd.DataFrame(x)
except:
return x
3 changes: 2 additions & 1 deletion src/wrapr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .library import library, importr
from .load_namespace import try_load_namespace
from .lazy_rexpr import lazily
from .robject import Robject
from .convert_r2py import Robject
from .settings import settings, Settings
33 changes: 17 additions & 16 deletions src/wrapr/convert_py2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rpy2.robjects import FloatVector, pandas2ri, numpy2ri

from .rutils import rcall

from .convert_r2py import Robject
# We can uncomment this when we transition to 3.12
# type RBaseObject = (
# ro.FloatVector | ro.FloatVector | ro.IntVector |
Expand All @@ -32,30 +32,30 @@ def convert_py2r(args: List[Any], kwargs: Dict[str, Any]) -> None:

def convert_pyobject2r(x: Any) -> Any: # RBaseObject | PyDtype | Any:
match x:
case Robject():
return x.Robj
case np.ndarray():
out = convert_numpy2r(x)
return convert_numpy2r(x)
case scipy.sparse.coo_array() | scipy.sparse.coo_matrix():
out = convert_pysparsematrix(x)
return convert_pysparsematrix(x)
case OrderedDict() | dict():
out = dict2rlist(x)
return dict2rlist(x)
case list() | tuple() | set():
out = pylist2rlist(x)
return pylist2rlist(x)
case pd.DataFrame():
out = pandas2r(x)
return pandas2r(x)
case NoneType():
out = ro.NULL
return ro.NULL
case np.bool_():
out = bool(x)
return bool(x)
case np.int8() | np.int16() | np.int32() | np.int64():
out = int(x)
case np.float16() | np.float32() | np.float64() | np.float128():
out = float(x)
return int(x)
case np.float16() | np.float32() | np.float64():
return float(x)
case np.str_() | np.bytes_():
out = str(x)
return str(x)
case _:
out = x
return out

return x


def convert_numpy2r(x: NDArray) -> Any: # RBaseObject:
Expand Down Expand Up @@ -117,7 +117,8 @@ def dict2rlist(x: Dict | OrderedDict) -> ro.ListVector:

def pylist2rlist(x: List | Tuple | Set) -> ro.ListVector:
y: Dict[str, Any] = {str(k): v for k, v in enumerate(x)}
return ro.ListVector(dict2rlist(y))
unname: Callable = rcall("unname")
return unname(dict2rlist(y))


def convert_pysparsematrix(x: scipy.sparse.coo_array | scipy.sparse.coo_matrix):
Expand Down
113 changes: 30 additions & 83 deletions src/wrapr/convert_r2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,42 @@
from .lazy_rexpr import lazily, lazy_wrap
from .rutils import rcall

from .RDataFrame import convert_pandas, attempt_pandas_conversion
from .RArray import convert_numpy, is_valid_numpy, filter_numpy


class Robject():
def __init__(self, Robj: Any):
self.Robj = Robj

def __str__(self) -> str:
# return captureRprint(self.Robj)
return self.Robj.__str__()

def __repr__(self):
# return self.Robj.__repr__()
return self.Robj.__str__()

def __getattr__(self, name: str) -> Any:
fun: Callable = rfunc(name)
return fun(self.Robj)

def __getitem__(self, *args):
return self.Robj.__getitem__(*args)

def __iter__(self):
return self.Robj.__iter__()

def to_py(self):
return convert_r2py(self.Robj)


def convert_r2py(x: Any) -> Any:
match x:
case str() | int() | bool() | float():
return x
case rpy2.rinterface_lib.sexp.NULLType():
return None
case ro.methods.RS4():
return convert_s4(x)
case vc.DataFrame():
return convert_pandas(x)
case vc.Vector() | vc.Matrix() | vc.Array() if not is_rlist(x):
Expand All @@ -45,7 +72,7 @@ def convert_r2py(x: Any) -> Any:
case vc.ListSexpVector() | vc.ListVector():
return convert_rlist2py(x)
case _:
return generic_conversion(x)
return Robject(x)


def convert_list(X: List | Tuple) -> Any:
Expand Down Expand Up @@ -85,83 +112,3 @@ def convert_dict(X: Dict | OrderedDict,
X[key] = convert_r2py(X[key])
finally:
return X


def convert_numpy(x: vc.Vector | NDArray) -> NDArray | None:
if isinstance(x, rpy2.rinterface_lib.sexp.NULLType):
return None
match x: # this should be expanded upon
case vc.BoolVector() | vc.BoolArray() | vc.BoolMatrix():
dtype = "bool"
case vc.FloatVector() | vc.FloatArray() | vc.FloatMatrix():
dtype = "float"
case vc.IntVector() | vc.IntArray() | vc.IntMatrix():
dtype = "int"
case vc.StrArray() | vc.StrVector() | vc.StrMatrix():
dtype = "U"
case _:
dtype = None

y = np.asarray(x, dtype=dtype)
return filter_numpy(y)


def filter_numpy(x: NDArray) -> NDArray | int | str | float | bool:
# sometimes a numpy array will have one element with shape (,)
# this should be (1,)
y = x[np.newaxis][0] if not x.shape else x
# if shape is (1,) we should just return as int | str | float | bool
# R doesn't have these types, only vectors/arrays, this will probably
# give unexpected results for users who are unfamiliar with R, so
# we return the first element instead
y = y[0] if y.shape == (1,) else y
return y


def is_valid_numpy(x: NDArray) -> bool:
return x.dtype.fields is None


def convert_pandas(df: vc.DataFrame) -> pd.DataFrame:
colnames = df.names
df_dict = {c: convert_numpy(x) for c, x in zip(colnames, list(df))}
return pd.DataFrame(df_dict)

with (ro.default_converter + pandas2ri.converter).context():
pd_df = ro.conversion.get_conversion().rpy2py(df)
return pd_df


def attempt_pandas_conversion(x: Any) -> Any:
try:
return pd.DataFrame(x)
except:
return x


def generic_conversion(x: Any) -> Any:
try:
arr = np.asarray(x)
if not is_valid_numpy(arr):
raise Error
return arr
except:
return attempt_pandas_conversion(x)


def convert_s4(x: ro.methods.RS4) -> Any:
rclass = get_rclass(x)
if rclass is None:
return generic_conversion(x)

match np_collapse(rclass):
case "dgCMatrix": # to do: put this in a seperate function
dense = convert_numpy(as_matrix(x))
sparse = scipy.sparse.coo_matrix(dense)
return sparse
case _:
return generic_conversion(x)




47 changes: 8 additions & 39 deletions src/wrapr/function_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,10 @@
from numpy.typing import NDArray
from typing import Any, Callable, Dict, List
from .convert_py2r import convert_py2r
from .convert_r2py import convert_r2py
from .convert_r2py import convert_r2py, Robject
from .rutils import rcall
from .lazy_rexpr import lazy_wrap
from .robject import Robject

# def robjectwrap(py_object: Any, r_object: Any = None) -> Any:
# if py_object is None:
# return None
#
# class RobjectWrapper(type(py_object)):
# @classmethod
# def from_existing(cls, existing_instance, new_r_object):
# # Create a new instance of RobjectWrapper
# new_instance = cls(existing_instance._obj, new_r_object)
# return new_instance
#
# def __strip__(self):
# return self._obj
#
# # Wrap the initial py_object
# wrapped_object = RobjectWrapper(py_object, r_object)
#
# return wrapped_object
#
#
# def strip_RobjectWrapper(x: Any) -> Any:
# if hasattr(x, "__strip__"):
# return x.__strip__()
# else:
# return x


# def strip_args(args: List[Any], kwargs: Dict[str, Any]) -> None:
# for i, x in enumerate(args):
# args[i] = strip_RobjectWrapper(x)
# for k, v in kwargs.items():
# kwargs[k] = strip_RobjectWrapper(v)

from .settings import Settings, settings

def wrap_rfunc(func: Callable | Any, name: str | None) -> Callable | Any:
# should be a Callable, but may f-up (thus Any)
Expand All @@ -55,9 +21,12 @@ def wrap(*args, **kwargs):
lazyfunc = lazy_wrap(args=args, kwargs=kwargs, func=func,
func_name=name)
r_object: Any = lazyfunc(*args, **kwargs)
py_object = convert_r2py(r_object)
# return robjectwrap(py_object, r_object)
return py_object

if settings.Rview:
return Robject(r_object)
else:
return convert_r2py(r_object)


try:
wrap.__doc__ = func.__doc__
Expand Down
24 changes: 16 additions & 8 deletions src/wrapr/renv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from .utils import ROutputCapture, pinfo
from .function_wrapper import rfunc, wrap_rfunc # wrap_rfunc should perhaps be its own module
from .rutils import rcall
from .convert_r2py import convert_r2py, Robject
from .settings import Settings, settings


class Renv:
def __init__(self, env_name):
Expand Down Expand Up @@ -95,14 +98,19 @@ def function(self, expr: str) -> Callable:
# return attributes


def fetch_data(dataset: str, module: rpkg.Package) -> pd.DataFrame | None:
with (ro.default_converter + pandas2ri.converter).context():
try:
return rpkg.data(module).fetch(dataset)[dataset]
except KeyError:
return None
except:
return None
def fetch_data(dataset: str, module: rpkg.Package) -> pd.DataFrame | Robject | None:
try:
r_object = rpkg.data(module).fetch(dataset)[dataset]

if settings.Rview:
return Robject(r_object)
else:
return convert_r2py(r_object)

except KeyError:
return None
except:
return None


def get_assets(env_name: str, module: rpkg.Package) -> Tuple[Set[str], Set[str]]:
Expand Down
6 changes: 6 additions & 0 deletions src/wrapr/rlist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from collections import UserList


a = UserList([1, 2, 3, 4])
setattr(a, "rowstart", 1)
a.rowstart
Loading

0 comments on commit 93f8de9

Please sign in to comment.