PandABlocks · evalott100 · Jul 18, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/src/pandablocks/connections.py b/src/pandablocks/connections.py
@@ -303,15 +303,22 @@ def _handle_header_body(self):
         if line == b"</header>":
             fields = []
             root = ET.fromstring(self._header)
+
             for field in root.find("fields"):
                 fields.append(
                     FieldCapture(
                         name=str(field.get("name")),
                         type=np.dtype(field.get("type")),
                         capture=str(field.get("capture")),
-                        scale=float(field.get("scale", 1)),
-                        offset=float(field.get("offset", 0)),
-                        units=str(field.get("units", "")),
+                        scale=float(scale)
+                        if (scale := field.get("scale")) is not None
+                        else None,
+                        offset=float(offset)
+                        if (offset := field.get("offset")) is not None
+                        else None,
+                        units=str(units)
+                        if (units := field.get("units")) is not None
+                        else None,
                     )
                 )
             data = root.find("data")
@@ -323,7 +330,14 @@ def _handle_header_body(self):
                 name, capture = SAMPLES_FIELD.rsplit(".", maxsplit=1)
                 fields.insert(
                     0,
-                    FieldCapture(name, np.dtype("uint32"), capture),
+                    FieldCapture(
+                        name=name,
+                        type=np.dtype("uint32"),
+                        capture=capture,
+                        scale=None,
+                        offset=None,
+                        units=None,
+                    ),
                 )
             self._frame_dtype = np.dtype(
                 [(f"{f.name}.{f.capture}", f.type) for f in fields]

diff --git a/src/pandablocks/hdf.py b/src/pandablocks/hdf.py
@@ -111,16 +111,13 @@ def __init__(
     def create_dataset(self, field: FieldCapture, raw: bool):
         # Data written in a big stack, growing in that dimension
         assert self.hdf_file, "File not open yet"
-        if raw and (field.capture == "Mean" or field.scale != 1 or field.offset != 0):
-            # Processor outputs a float
-            dtype = np.dtype("float64")
-        else:
-            # No processor, datatype passed through
-            dtype = field.type
+
         dataset_name = self.capture_record_hdf_names.get(field.name, {}).get(
             field.capture, f"{field.name}.{field.capture}"
         )
 
+        dtype = field.raw_mode_dataset_dtype if raw else field.type
+
         return self.hdf_file.create_dataset(
             f"/{dataset_name}",
             dtype=dtype,
@@ -201,7 +198,7 @@ def mean_callable(data):
                 return (data[column_name] * field.scale / gate_duration) + field.offset
 
             return mean_callable
-        elif raw and (field.scale != 1 or field.offset != 0):
+        elif raw and field.has_scale_or_offset:
             return lambda data: data[column_name] * field.scale + field.offset
         else:
             return lambda data: data[column_name]
@@ -289,7 +286,7 @@ async def write_hdf_files(
     try:
         async for data in client.data(scaled=False, flush_period=flush_period):
             pipeline[0].queue.put_nowait(data)
-            if type(data) == EndData:
+            if isinstance(data, EndData):
                 end_data = data
                 counter += 1
                 if counter == num:

diff --git a/src/pandablocks/responses.py b/src/pandablocks/responses.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum
 from typing import Dict, List, Optional, Tuple
 
@@ -223,21 +223,46 @@ class EndReason(Enum):
 class FieldCapture:
     """Information about a field that is being captured
 
+    If scale, offset, and units are all `None`, then the field is a
+    ``PCAP.BITS``.
+
     Attributes:
         name: Name of captured field
         type: Numpy data type of the field as transmitted
         capture: Value of CAPTURE field used to enable this field
-        scale: Scaling factor, default 1.0
-        offset: Offset, default 0.0
-        units: Units string, default ""
+        scale: Scaling factor
+        offset: Offset
+        units: Units string
     """
 
     name: str
     type: np.dtype
     capture: str
-    scale: float = 1.0
-    offset: float = 0.0
-    units: str = ""
+    scale: Optional[float] = field(default=None)
+    offset: Optional[float] = field(default=None)
+    units: Optional[str] = field(default=None)
+
+    def __post_init__(self):
+        sou = (self.scale, self.offset, self.units)
+        if sou != (None, None, None) and None in sou:
+            raise ValueError(
+                f"If any of `scale={self.scale}`, `offset={self.offset}`"
+                f", or `units={self.units}` is set, all must be set."
+            )
+
+    @property
+    def raw_mode_dataset_dtype(self) -> np.dtype:
+        """We use double for all dtypes that have scale and offset."""
+        if self.scale is not None and self.offset is not None:
+            return np.dtype("float64")
+        return self.type
+
+    @property
+    def has_scale_or_offset(self) -> bool:
+        """Return True if this field is a PCAP.BITS or PCAP.SAMPLES field"""
+        return (self.scale is not None and self.offset is not None) and (
+            self.scale != 1 or self.offset != 0
+        )
 
 
 class Data:
@@ -289,8 +314,8 @@ class FrameData(Data):
         ...       (2, 12)],
         ...      dtype=[('COUNTER1.OUT.Value', '<f8'), ('COUNTER2.OUT.Value', '<f8')])
         >>> fdata = FrameData(data)
-        >>> fdata.data[0] # Row view
-        (0., 10.)
+        >>> (fdata.data[0]['COUNTER1.OUT.Value'], fdata.data[0]['COUNTER2.OUT.Value'])
+        (np.float64(0.0), np.float64(10.0))
         >>> fdata.column_names # Column names
         ('COUNTER1.OUT.Value', 'COUNTER2.OUT.Value')
         >>> fdata.data['COUNTER1.OUT.Value'] # Column view

diff --git a/src/pandablocks/utils.py b/src/pandablocks/utils.py
@@ -62,7 +62,9 @@ def words_to_table(
 
         if field_info.subtype == "int":
             # First convert from 2's complement to offset, then add in offset.
-            temp = (value ^ (1 << (bit_length - 1))) + (-1 << (bit_length - 1))
+            temp = (value.astype(np.int64) ^ (1 << (bit_length - 1))) + (
+                -1 << (bit_length - 1)
+            )
             packing_value = temp.astype(np.int32)
         elif field_info.subtype == "enum" and convert_enum_indices:
             assert field_info.labels, f"Enum field {field_name} has no labels"

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -88,9 +88,9 @@ def overrun_dump():
         name="PCAP.BITS2",
         type=np.dtype("uint32"),
         capture="Value",
-        scale=1,
-        offset=0,
-        units="",
+        scale=None,
+        offset=None,
+        units=None,
     ),
     FieldCapture(
         name="COUNTER1.OUT",

diff --git a/tests/test_hdf.py b/tests/test_hdf.py
@@ -61,6 +61,84 @@ def __init__(self):
         stop_pipeline(pipeline)
 
 
+def test_field_capture_pcap_bits():
+    pcap_bits_frame_data = FieldCapture(
+        name="PCAP.BITS",
+        type=np.dtype("uint32"),
+        capture="Value",
+        scale=None,
+        offset=None,
+        units=None,
+    )
+
+    assert not pcap_bits_frame_data.has_scale_or_offset
+    assert pcap_bits_frame_data.raw_mode_dataset_dtype is np.dtype("uint32")
+
+    frame_data_without_scale_offset = FieldCapture(
+        name="frame_data_without_scale_offset",
+        type=np.dtype("uint32"),
+        capture="Value",
+        scale=1.0,
+        offset=0.0,
+        units="",
+    )
+
+    assert not frame_data_without_scale_offset.has_scale_or_offset
+    assert frame_data_without_scale_offset.raw_mode_dataset_dtype is np.dtype("float64")
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "If any of `scale=None`, `offset=0.0`, or "
+            "`units=` is set, all must be set"
+        ),
+    ):
+        _ = FieldCapture(
+            name="malformed_frame_data",
+            type=np.dtype("uint32"),
+            capture="Value",
+            scale=None,
+            offset=0.0,
+            units="",
+        )
+
+    frame_data_with_offset = FieldCapture(
+        name="frame_data_with_offset",
+        type=np.dtype("uint32"),
+        capture="Value",
+        scale=1.0,
+        offset=1.0,
+        units="",
+    )
+    frame_data_with_scale = FieldCapture(
+        name="frame_data_with_scale",
+        type=np.dtype("uint32"),
+        capture="Value",
+        scale=1.1,
+        offset=0.0,
+        units="",
+    )
+
+    assert frame_data_with_offset.has_scale_or_offset
+    assert frame_data_with_offset.raw_mode_dataset_dtype is np.dtype("float64")
+    assert frame_data_with_scale.has_scale_or_offset
+    assert frame_data_with_scale.raw_mode_dataset_dtype is np.dtype("float64")
+
+    frame_data_with_scale_and_offset = FieldCapture(
+        name="frame_data_with_scale_and_offset",
+        type=np.dtype("uint32"),
+        capture="Value",
+        scale=1.1,
+        offset=0.0,
+        units="",
+    )
+
+    assert frame_data_with_scale_and_offset.has_scale_or_offset
+    assert frame_data_with_scale_and_offset.raw_mode_dataset_dtype is np.dtype(
+        "float64"
+    )
+
+
 @pytest.mark.parametrize(
     "capture_record_hdf_names,expected_names",
     [