Merge pull request #4 from nawendt/fix-text

Fixed truncated text data on surface and soundings
nawendt · Jan 28, 2022 · 6f53e4b · 6f53e4b
2 parents cf7088e + a89f442
commit 6f53e4b
Show file tree

Hide file tree

Showing 10 changed files with 121 additions and 60 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2021, Nathan Wendt
+Copyright (c) 2022, Nathan Wendt
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

diff --git a/gempakio/decode.py b/gempakio/decode.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Nathan Wendt.
+# Copyright (c) 2022 Nathan Wendt.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause
 """Classes for decoding various GEMPAK file formats."""
@@ -1139,6 +1139,9 @@ def _unpack_merged(self, sndno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -1202,6 +1205,9 @@ def _unpack_unmerged(self, sndno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -1765,18 +1771,17 @@ def _merge_sounding(self, parts):
                     sped = parts['PPDD']['SPED'][i]
                 skip = False
 
-                if (hght == self.prod_desc.missing_float
+                if ((hght == self.prod_desc.missing_float
                    and drct == self.prod_desc.missing_float
-                   and sped == self.prod_desc.missing_float):
+                   and sped == self.prod_desc.missing_float)
+                   or hght <= zold):
                     skip = True
                 elif abs(zold - hght) < 1:
                     skip = True
                     if (merged['DRCT'][ilev - 1] == self.prod_desc.missing_float
                        or merged['SPED'][ilev - 1] == self.prod_desc.missing_float):
                         merged['DRCT'][ilev - 1] = drct
                         merged['SPED'][ilev - 1] = sped
-                elif hght <= zold:
-                    skip = True
                 elif hght >= znxt:
                     while more and hght > znxt:
                         zold = znxt
@@ -2033,9 +2038,36 @@ def __init__(self, file, *args, **kwargs):
         if self.surface_type == 'standard':
             for irow, row_head in enumerate(self.row_headers):
                 for icol, col_head in enumerate(self.column_headers):
+                    for iprt in range(len(self.parts)):
+                        pointer = (self.prod_desc.data_block_ptr
+                                   + (irow * self.prod_desc.columns * self.prod_desc.parts)
+                                   + (icol * self.prod_desc.parts + iprt))
+
+                        self._buffer.jump_to(self._start, _word_to_position(pointer))
+                        data_ptr = self._buffer.read_int(4, self.endian, False)
+
+                        if data_ptr:
+                            self._sfinfo.append(
+                                Surface(
+                                    irow,
+                                    icol,
+                                    datetime.combine(row_head.DATE, row_head.TIME),
+                                    col_head.STID + col_head.STD2,
+                                    col_head.STNM,
+                                    col_head.SLAT,
+                                    col_head.SLON,
+                                    col_head.SELV,
+                                    col_head.STAT,
+                                    col_head.COUN,
+                                )
+                            )
+        elif self.surface_type == 'ship':
+            irow = 0
+            for icol, col_head in enumerate(self.column_headers):
+                for iprt in range(len(self.parts)):
                     pointer = (self.prod_desc.data_block_ptr
                                + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                               + (icol * self.prod_desc.parts))
+                               + (icol * self.prod_desc.parts + iprt))
 
                     self._buffer.jump_to(self._start, _word_to_position(pointer))
                     data_ptr = self._buffer.read_int(4, self.endian, False)
@@ -2045,7 +2077,7 @@ def __init__(self, file, *args, **kwargs):
                             Surface(
                                 irow,
                                 icol,
-                                datetime.combine(row_head.DATE, row_head.TIME),
+                                datetime.combine(col_head.DATE, col_head.TIME),
                                 col_head.STID + col_head.STD2,
                                 col_head.STNM,
                                 col_head.SLAT,
@@ -2055,56 +2087,32 @@ def __init__(self, file, *args, **kwargs):
                                 col_head.COUN,
                             )
                         )
-        elif self.surface_type == 'ship':
-            irow = 0
-            for icol, col_head in enumerate(self.column_headers):
-                pointer = (self.prod_desc.data_block_ptr
-                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                           + (icol * self.prod_desc.parts))
-
-                self._buffer.jump_to(self._start, _word_to_position(pointer))
-                data_ptr = self._buffer.read_int(4, self.endian, False)
-
-                if data_ptr:
-                    self._sfinfo.append(
-                        Surface(
-                            irow,
-                            icol,
-                            datetime.combine(col_head.DATE, col_head.TIME),
-                            col_head.STID + col_head.STD2,
-                            col_head.STNM,
-                            col_head.SLAT,
-                            col_head.SLON,
-                            col_head.SELV,
-                            col_head.STAT,
-                            col_head.COUN,
-                        )
-                    )
         elif self.surface_type == 'climate':
             for icol, col_head in enumerate(self.column_headers):
                 for irow, row_head in enumerate(self.row_headers):
-                    pointer = (self.prod_desc.data_block_ptr
-                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                               + (icol * self.prod_desc.parts))
-
-                    self._buffer.jump_to(self._start, _word_to_position(pointer))
-                    data_ptr = self._buffer.read_int(4, self.endian, False)
-
-                    if data_ptr:
-                        self._sfinfo.append(
-                            Surface(
-                                irow,
-                                icol,
-                                datetime.combine(col_head.DATE, col_head.TIME),
-                                row_head.STID + row_head.STD2,
-                                row_head.STNM,
-                                row_head.SLAT,
-                                row_head.SLON,
-                                row_head.SELV,
-                                row_head.STAT,
-                                row_head.COUN,
+                    for iprt in range(len(self.parts)):
+                        pointer = (self.prod_desc.data_block_ptr
+                                   + (irow * self.prod_desc.columns * self.prod_desc.parts)
+                                   + (icol * self.prod_desc.parts + iprt))
+
+                        self._buffer.jump_to(self._start, _word_to_position(pointer))
+                        data_ptr = self._buffer.read_int(4, self.endian, False)
+
+                        if data_ptr:
+                            self._sfinfo.append(
+                                Surface(
+                                    irow,
+                                    icol,
+                                    datetime.combine(col_head.DATE, col_head.TIME),
+                                    row_head.STID + row_head.STD2,
+                                    row_head.STNM,
+                                    row_head.SLAT,
+                                    row_head.SLON,
+                                    row_head.SELV,
+                                    row_head.STAT,
+                                    row_head.COUN,
+                                )
                             )
-                        )
         else:
             raise TypeError('Unknown surface type {}'.format(self.surface_type))
 
@@ -2183,6 +2191,9 @@ def _unpack_climate(self, sfcno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -2250,6 +2261,9 @@ def _unpack_ship(self, sfcno):
                 if fmt_code is None:
                     raise NotImplementedError('No methods for data type {}'
                                               .format(part.data_type))
+                if fmt_code == 's':
+                    lendat *= BYTES_PER_WORD
+
                 packed_buffer = (
                     self._buffer.read_struct(
                         struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -2304,6 +2318,11 @@ def _unpack_standard(self, sfcno):
                     self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
                     self.data_header_length = self._buffer.read_int(4, self.endian, False)
                     data_header = self._buffer.set_mark()
+                    # if part.header_length == 1:
+                    #     ihhmm = self._buffer.read_int(4, self.endian, False)
+                    # if part.header_length == 2:
+                    #     nreps = self._buffer.read_int(4, self.endian, False)
+                    #     ihhmm = self._buffer.read_int(4, self.endian, False)
                     self._buffer.jump_to(data_header,
                                          _word_to_position(part.header_length + 1))
                     lendat = self.data_header_length - part.header_length
@@ -2317,6 +2336,9 @@ def _unpack_standard(self, sfcno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')

diff --git a/gempakio/tools.py b/gempakio/tools.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2009,2016,2019 MetPy Developers.
 # Copyright (c) 2021 Nathan Wendt.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause

diff --git a/setup.py b/setup.py
@@ -1,12 +1,12 @@
-# Copyright (c) 2021 Nathan Wendt.
+# Copyright (c) 2022 Nathan Wendt.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause
 """Setup script for installing gempakIO."""
 
 from setuptools import find_packages, setup
 
 NAME = 'gempakio'
-VERSION = '0.6'
+VERSION = '0.6.2'
 DESCR = 'Read GEMPAK data with pure Python.'
 URL = 'https://github.com/nawendt/gempakio'
 REQUIRES = ['pyproj', 'xarray']

diff --git a/tests/data/msn_std_sfc.csv b/tests/data/msn_std_sfc.csv
@@ -0,0 +1,2 @@
+TEXT,SPCL
+"KMSN 062353Z 01003KT 10SM CLR 21/10 A2986 RMK AO2 SLP111 T02110100 10261 20211 58011 $","KMSN 071604Z 23009G18KT 2SM R36/3000VP6000FT -TSRA BKN027CB OVC080 22/18 A2966"
diff --git a/tests/data/msn_std_sfc.sfc b/tests/data/msn_std_sfc.sfc
diff --git a/tests/data/unmerged_with_text.csv b/tests/data/unmerged_with_text.csv
@@ -0,0 +1,2 @@
+TXTA,TXTB,TXTC,TXPB
+"TTAA 57001 72357 99977 22458 15007 00142 ///// ///// 92818 18056 15518 85535 11834 17514 70148 06271 05504 50582 07599 07047 40751 20598 06036 30957 36187 04025 25081 46375 35531 20226 55172 34047 15406 62574 33036 10649 74363 29022 88105 73963 27521 77999 31313 51108 82302","TTBB 57008 72357 00977 22458 11976 22259 22847 11028 33825 09632 44821 09846 55817 12064 66812 12267 77786 10467 88777 11475 99752 10872 11565 05181 22517 05199 33415 19581 44341 30382 55320 31991 66220 53565 77190 55578 88165 61574 99145 61775 11105 73963 22101 74163 31313 51108 82302 41414 00900 51515 10164 00091 10194 16015 13005","TTCC 57002 72357 70859 73164 28512 50060 66371 28511 30379 51586 22516 20642 50986 29508 88999 77999 31313 51108 82302","PPBB 57008 72357 90012 15007 15007 15018 90345 15518 17016 17512 90678 16012 13509 10005 909// 04007 91012 03003 06505 08005 91345 10509 11010 10515 9168/ 09524 07547 92059 06546 06036 06032 93024 05028 03524 00530 9357/ 36030 34037 94257 34055 33049 33009 9489/ 25505 21512 9504/ 22017 29020"
diff --git a/tests/data/unmerged_with_text.snd b/tests/data/unmerged_with_text.snd
diff --git a/tests/test_soundings.py b/tests/test_soundings.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Nathan Wendt.
+# Copyright (c) 2022 Nathan Wendt.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause
 """Tests for decoding GEMPAK grid files."""
@@ -74,6 +74,22 @@ def test_merged():
     np.testing.assert_allclose(gdtar, ddtar, rtol=1e-10, atol=1e-2)
 
 
+@pytest.mark.parametrize('text_type', ['txta', 'txtb', 'txtc', 'txpb'])
+def test_radat_text(text_type):
+    """Test for proper decoding of RADAT text."""
+
+    g = Path(__file__).parent / 'data' / 'unmerged_with_text.snd'
+    d = Path(__file__).parent / 'data' / 'unmerged_with_text.csv'
+
+    gso = GempakSounding(g).snxarray(station_id='OUN')[0]
+    gempak = pd.read_csv(d)
+
+    text = gso.attrs['RADAT'][text_type]
+    gem_text = gempak.loc[:, text_type.upper()][0]
+
+    assert text == gem_text
+
+
 @pytest.mark.parametrize('gem,gio,station', [
     ('top_sigw_hght_unmrg.csv', 'top_sigw_hght_unmrg.snd', 'TOP'),
     ('waml_sigw_pres_unmrg.csv', 'waml_sigw_pres_unmrg.snd', 'WAML')

diff --git a/tests/test_surface.py b/tests/test_surface.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Nathan Wendt.
+# Copyright (c) 2022 Nathan Wendt.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause
 """Tests for decoding GEMPAK surface files."""
@@ -18,7 +18,7 @@ def test_standard_surface():
     def dtparse(string):
         return datetime.strptime(string, '%y%m%d/%H%M')
 
-    skip = ['text']
+    skip = ['text', 'spcl']
 
     g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
     d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'
@@ -47,7 +47,7 @@ def test_ship_surface():
     def dtparse(string):
         return datetime.strptime(string, '%y%m%d/%H%M')
 
-    skip = ['text']
+    skip = ['text', 'spcl']
 
     g = Path(__file__).parent / 'data' / 'ship_sfc.sfc'
     d = Path(__file__).parent / 'data' / 'ship_sfc.csv'
@@ -74,3 +74,21 @@ def dtparse(string):
                 decoded_vals = [d['values'][param.lower()] for d in gstns]
                 actual_vals = ugem.loc[:, param].values
                 np.testing.assert_allclose(decoded_vals, actual_vals)
+
+
+@pytest.mark.parametrize('text_type,date_time', [
+    ('text', '202109070000'), ('spcl', '202109071600')
+])
+def test_surface_text(text_type, date_time):
+    """Test text decoding of surface hourly and special observations."""
+
+    g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
+    d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'
+
+    gsf = GempakSurface(g)
+    text = gsf.nearest_time(date_time, station_id='MSN')[0]['values'][text_type]
+
+    gempak = pd.read_csv(d)
+    gem_text = gempak.loc[:, text_type.upper()][0]
+
+    assert text == gem_text
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		TEXT,SPCL
		"KMSN 062353Z 01003KT 10SM CLR 21/10 A2986 RMK AO2 SLP111 T02110100 10261 20211 58011 $","KMSN 071604Z 23009G18KT 2SM R36/3000VP6000FT -TSRA BKN027CB OVC080 22/18 A2966"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		TXTA,TXTB,TXTC,TXPB
		"TTAA 57001 72357 99977 22458 15007 00142 ///// ///// 92818 18056 15518 85535 11834 17514 70148 06271 05504 50582 07599 07047 40751 20598 06036 30957 36187 04025 25081 46375 35531 20226 55172 34047 15406 62574 33036 10649 74363 29022 88105 73963 27521 77999 31313 51108 82302","TTBB 57008 72357 00977 22458 11976 22259 22847 11028 33825 09632 44821 09846 55817 12064 66812 12267 77786 10467 88777 11475 99752 10872 11565 05181 22517 05199 33415 19581 44341 30382 55320 31991 66220 53565 77190 55578 88165 61574 99145 61775 11105 73963 22101 74163 31313 51108 82302 41414 00900 51515 10164 00091 10194 16015 13005","TTCC 57002 72357 70859 73164 28512 50060 66371 28511 30379 51586 22516 20642 50986 29508 88999 77999 31313 51108 82302","PPBB 57008 72357 90012 15007 15007 15018 90345 15518 17016 17512 90678 16012 13509 10005 909// 04007 91012 03003 06505 08005 91345 10509 11010 10515 9168/ 09524 07547 92059 06546 06036 06032 93024 05028 03524 00530 9357/ 36030 34037 94257 34055 33049 33009 9489/ 25505 21512 9504/ 22017 29020"