vnmabus · trossi · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/rdata/_write.py b/rdata/_write.py
@@ -3,7 +3,7 @@
 
 from typing import TYPE_CHECKING
 
-from .conversion import build_r_data, convert_to_r_object, convert_to_r_object_for_rda
+from .conversion import convert_python_to_r_data
 from .conversion.to_r import DEFAULT_FORMAT_VERSION
 from .unparser import unparse_file
 
@@ -27,10 +27,7 @@ def write_rds(
     """
     Write an RDS file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -52,15 +49,12 @@ def write_rds(
         >>> data = ["hello", 1, 2.2, 3.3+4.4j]
         >>> rdata.write_rds("test.rds", data)
     """
-    r_object = convert_to_r_object(
+    r_data = convert_python_to_r_data(
         data,
         encoding=encoding,
-    )
-    r_data = build_r_data(
-        r_object,
-        encoding=encoding,
         format_version=format_version,
     )
+
     unparse_file(
         path,
         r_data,
@@ -82,10 +76,7 @@ def write_rda(
     """
     Write an RDA or RDATA file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object_for_rda`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -107,15 +98,13 @@ def write_rda(
         >>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
         >>> rdata.write_rda("test.rda", data)
     """
-    r_object = convert_to_r_object_for_rda(
+    r_data = convert_python_to_r_data(
         data,
         encoding=encoding,
-    )
-    r_data = build_r_data(
-        r_object,
-        encoding=encoding,
         format_version=format_version,
+        file_type="rda",
     )
+
     unparse_file(
         path,
         r_data,

diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
@@ -25,7 +25,7 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
-    build_r_data as build_r_data,
-    convert_to_r_object as convert_to_r_object,
-    convert_to_r_object_for_rda as convert_to_r_object_for_rda,
+    ConverterFromPythonToR as ConverterFromPythonToR,
+    convert_python_to_r_data as convert_python_to_r_data,
+    convert_python_to_r_object as convert_python_to_r_object,
 )
diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
@@ -14,6 +14,8 @@
 import xarray
 from typing_extensions import override
 
+from rdata.parser._parser import get_altrep_name
+
 from .. import parser
 
 ConversionFunction = Callable[[Union[parser.RData, parser.RObject]], Any]
@@ -394,20 +396,70 @@ def convert_array(
     return value  # type: ignore [no-any-return]
 
 
-R_INT_MIN = -2**31
+def convert_altrep_to_range(
+    r_altrep: parser.RObject,
+) -> range:
+    """
+    Convert a R altrep to range object.
+
+    Args:
+        r_altrep: R altrep object
+
+    Returns:
+        Range object.
+    """
+    if r_altrep.info.type != parser.RObjectType.ALTREP:
+        msg = "Must receive an altrep object"
+        raise TypeError(msg)
+
+    info, state, attr = r_altrep.value
+    assert attr.info.type == parser.RObjectType.NILVALUE
+
+    altrep_name = get_altrep_name(info)
+
+    if altrep_name != b"compact_intseq":
+        msg = "Only compact integer sequences can be converted to range"
+        raise NotImplementedError(msg)
+
+    n = int(state.value[0])
+    start = int(state.value[1])
+    step = int(state.value[2])
+    stop = start + (n - 1) * step
+    return range(start, stop + 1, step)
 
 
 def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
 
     if isinstance(source, np.ndarray):
+        dtype: Any
         if np.issubdtype(source.dtype, np.integer):
-            return pd.Series(source, dtype=pd.Int32Dtype()).array
-
-        if np.issubdtype(source.dtype, np.bool_):
-            return pd.Series(source, dtype=pd.BooleanDtype()).array
+            dtype = pd.Int32Dtype()
+        elif np.issubdtype(source.dtype, np.floating):
+            # We return the numpy array here, which keeps
+            # R_FLOAT_NA, np.nan, and other NaNs as they were originally in the file.
+            # Users can then decide if they prefer to interpret
+            # only R_FLOAT_NA or all NaNs as "missing".
+            return source
+            # This would create an array with all NaNs as "missing":
+            # dtype = pd.Float64Dtype()  # noqa: ERA001
+            # This would create an array with only R_FLOAT_NA as "missing":
+            # from rdata.missing import is_na  # noqa: ERA001
+            # return pd.arrays.FloatingArray(source, is_na(source))  # noqa: ERA001
+        elif np.issubdtype(source.dtype, np.complexfloating):
+            # There seems to be no pandas type for complex array
+            return source
+        elif np.issubdtype(source.dtype, np.bool_):
+            dtype = pd.BooleanDtype()
+        elif np.issubdtype(source.dtype, np.str_):
+            dtype = pd.StringDtype()
+        elif np.issubdtype(source.dtype, np.object_):
+            for value in source:
+                assert isinstance(value, str) or value is None
+            dtype = pd.StringDtype()
+        else:
+            return source
 
-        if np.issubdtype(source.dtype, np.str_):
-            return pd.Series(source, dtype=pd.StringDtype()).array
+        return pd.Series(source, dtype=dtype).array
 
     return source
 
@@ -430,7 +482,7 @@ def dataframe_constructor(
             and isinstance(row_names, np.ma.MaskedArray)
             and row_names.mask[0]
         )
-        else tuple(row_names)
+        else row_names
     )
 
     return pd.DataFrame(obj, columns=obj, index=index)
@@ -820,6 +872,9 @@ def _convert_next(  # noqa: C901, PLR0912, PLR0915
 
             value = None
 
+        elif obj.info.type == parser.RObjectType.ALTREP:
+            value = convert_altrep_to_range(obj)
+
         else:
             msg = f"Type {obj.info.type} not implemented"
             raise NotImplementedError(msg)