spectraphilic · mkrupcale · Mar 14, 2017 · jdavid · Mar 24, 2017 · mkrupcale
diff --git a/.gitignore b/.gitignore
@@ -11,8 +11,9 @@ doc/build/*
 .project
 .pydev*
 f2py_build/f90/*
-FortFlex.so
+FortFlex*.so
 .idea/*
 
 /.cache/
 .*.swp
+
diff --git a/reflexible/conv2netcdf4/flexpart_read.py b/reflexible/conv2netcdf4/flexpart_read.py
@@ -5,6 +5,7 @@
 import os
 import datetime
 import re
+import string
 import numpy as np
 import bcolz
 from math import pi, sqrt, cos
@@ -40,67 +41,121 @@ def read_releases_v10(pathname):
 
     Returns
     -------
-    A ctable object from bcolz package.
+    A dictionary with the release data
+
+    .. tabularcolumns::  |l|L|
+
+    ===================      ========================================
+    Keys                     Description
+    ===================      ========================================
+    releases                 release data object from bcolz package
+    spec_ids                 array of species id integers
+    ===================      ========================================
     """
-    # Setup the container for the data
-    dtype = [('IDATE1', np.int32), ('ITIME1', np.int32),
-             ('IDATE2', np.int32), ('ITIME2', np.int32),
-             ('LON1', np.float32), ('LON2', np.float32),
-             ('LAT1', np.float32), ('LAT2', np.float32),
-             ('Z1', np.float32), ('Z2', np.float32),
-             ('ZKIND', np.int8), ('MASS', np.float32),
-             ('PARTS', np.int32), ('COMMENT', 'S32')]
+    # Read the input
+    input_str = open(pathname, 'r').read()
+    group_names = ['&RELEASES_CTRL\n', '&RELEASE\n']
+    end_group = '/\n'
+    dtypes = [[('NSPEC', np.int32), ('SPECNUM_REL', object)],
+              [('IDATE1', np.int32), ('ITIME1', np.int32),
+               ('IDATE2', np.int32), ('ITIME2', np.int32),
+               ('LON1', np.float32), ('LON2', np.float32),
+               ('LAT1', np.float32), ('LAT2', np.float32),
+               ('Z1', np.float32), ('Z2', np.float32),
+               ('ZKIND', np.int8), ('MASS', np.float32),
+               ('PARTS', np.int32), ('COMMENT', 'S32')]]
+
+    # Matches a key = value pair to obtain the value
+    namelist_group_re = r'[^\s,\=]+?\s*=\s*((?:[^=]+)|(?:(?:\"|\').+))(?=[\s,][^\s,\=]+\s*\=|$)'
+
     cparams = bcolz.cparams(cname="lz4", clevel=6, shuffle=1)
-    ctable = bcolz.zeros(0, dtype=dtype, cparams=cparams)
-    nrecords = ctable['IDATE1'].chunklen
-    releases = np.zeros(nrecords, dtype=dtype)
+
+    for k, group_name in enumerate(group_names):
+        len_marker = len(group_name)
+        ctable = bcolz.zeros(0, dtype=dtypes[k], cparams=cparams)
+        nrecords = ctable[ctable.names[0]].chunklen
+        values = np.zeros(nrecords, dtype=dtypes[k])
+        if k == 0:
+            releases_species = ctable
+        elif k == 1:
+            releases = ctable
+        # Loop over all the marker groups
+        i, n = 0, 0
+        while True:
+            i = input_str.find(group_name, i)
+            if i == -1:
+                break  # group marker is not found anymore
+            j = input_str.find(end_group, i + 1)
+            n += 1
+            group_block = input_str[i + len_marker: j]
+            i = j
+            values_tuple = tuple([s.strip(',"\''+string.whitespace) for s in re.findall(namelist_group_re, group_block)])
 
-    # Prepare for reading the input
-    input_str = open(pathname, 'r').read()
-    marker = "&RELEASE\n"
-    len_marker = len(marker)
-    release_re = r'\S+=\s+[\"|\s](\S+)[,|\"|\w]'
-
-    # Loop over all the marker groups
-    i, n = 0, 0
-    while True:
-        i = input_str.find(marker, i)
-        j = input_str.find(marker, i + 1)
-        n += 1
-        group_block = input_str[i + len_marker: j]
-        i = j
-        values = tuple(re.findall(release_re, group_block))
-        try:
-            releases[(n - 1) % nrecords] = values
-        except ValueError:
-            print("Problem at: group: %d, %s" % (n, group_block))
-            print("values:", values)
-            raise
-        if (n % nrecords) == 0:
-            ctable.append(releases)
-        if (i == -1) or (j == -1):
-            break  # marker is not found anymore
-    # Remainder
-    ctable.append(releases[:n % nrecords])
-    ctable.flush()
+            try:
+                values[(n - 1) % nrecords] = values_tuple
+            except ValueError:
+                print("Problem at: group: %d, %s" % (n, group_block))
+                print("values:", values_tuple)
+                raise
+            if n % nrecords == 0:
+                ctable.append(values)
 
-    return ctable
+        # Remainder
+        ctable.append(values[:n % nrecords])
+        ctable.flush()
+
+    return {'releases': releases,
+            'spec_ids': np.array(releases_species['SPECNUM_REL'][0].split(','), dtype=int)}
 
 
-def read_releases_v9(path):
+def read_releases_v9(path, headerrows=11):
     """Read metadata from a RELEASES path and return it as a dict.
 
-    Only 'release_point_names' entry returned.
+    Parameters
+    ----------
+    path : pathname
+      Release file name
+
+    Returns
+    -------
+    A dictionary with the release metadata
+
+    .. tabularcolumns::  |l|L|
+
+    ===================      ========================================
+    Keys                     Description
+    ===================      ========================================
+    release_point_names      array of release point name strings
+    spec_ids                 array of species id integers
+    ===================      ========================================
     """
+    spec_start_line = headerrows+1
+    old_format = False
     rpnames = []
     with open(path) as f:
         prev_line = None
+        lnum = 0
+        nspec = 0
+        spec_ids = list()
         for line in f:
-            if prev_line is not None and "comment" in line:
-                rpnames.append(prev_line.strip())
-            prev_line = line
-    # Return just the release point names for now
-    return {"release_point_names": np.array(rpnames, dtype="S45")}
+            lnum += 1
+            if lnum == spec_start_line:
+                nspec = int(line.split()[0])
+            elif lnum == spec_start_line+1 and 'Total' in line:
+                old_format = True
+                skip_lines = 2
+            if not old_format:
+                if lnum > spec_start_line and lnum <= spec_start_line + nspec:
+                    spec_ids.append(int(line.split()[0]))
+            else:
+                if prev_line is not None and "comment" in line:
+                    rpnames.append(prev_line.strip())
+                shift_lnum = lnum - spec_start_line - skip_lines
+                if shift_lnum > 0 and shift_lnum <= nspec*(skip_lines+1) and shift_lnum % (skip_lines+1) == 1:
+                    spec_ids.append(int(line.split()[0]))
+                prev_line = line
+    return {'release_point_names': np.array(rpnames, dtype='S45'),
+            'spec_ids': np.array(spec_ids, dtype=int)}
 
 
 def read_releases_old(path, headerrows=11):

diff --git a/reflexible/conv2netcdf4/fortflex/build_FortFlex.sh b/reflexible/conv2netcdf4/fortflex/build_FortFlex.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-rm -f FortFlex.pyf
 f2py -m FortFlex -h FortFlex.pyf FortFlex.f
 f2py -c --fcompiler=gfortran FortFlex.pyf FortFlex.f
+rm -f FortFlex.pyf
 mv *.so ..
 
diff --git a/reflexible/conv2netcdf4/fortflex/build_FortFlex_py3.sh b/reflexible/conv2netcdf4/fortflex/build_FortFlex_py3.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+f2py3 -m FortFlex -h FortFlex.pyf FortFlex.f
+f2py3 -c --fcompiler=gfortran FortFlex.pyf FortFlex.f
+rm -f FortFlex.pyf
+mv *.so ..
+
diff --git a/reflexible/conv2netcdf4/grid_read.py b/reflexible/conv2netcdf4/grid_read.py
@@ -613,7 +613,7 @@ def readgridV8(H, **kwargs):
 
     # What species to return?
     nspec_ret = OPS.nspec_ret
-    if isinstance(nspec_ret, int):
+    if isinstance(nspec_ret, (int, np.int64)):
         nspec_ret = [nspec_ret]
     assert iter(nspec_ret), "nspec_ret must be iterable."
 
@@ -622,7 +622,7 @@ def readgridV8(H, **kwargs):
     if OPS.time_ret is not None:
         get_dates = []
         time_ret = OPS.time_ret
-        if isinstance(time_ret, int) == True:
+        if isinstance(time_ret, (int, np.int64)):
             time_ret = [time_ret]
 
         if time_ret[0] < 0:
@@ -861,7 +861,7 @@ def readgridV6(H, **kwargs):
 
     # What species to return?
     nspec_ret = OPS.nspec_ret
-    if isinstance(nspec_ret, int):
+    if isinstance(nspec_ret, (int, np.int64)):
         nspec_ret = [nspec_ret]
     assert iter(nspec_ret), "nspec_ret must be iterable."
 
@@ -870,7 +870,7 @@ def readgridV6(H, **kwargs):
     if OPS.time_ret is not None:
         get_dates = []
         time_ret = OPS.time_ret
-        if isinstance(time_ret, int) == True:
+        if isinstance(time_ret, (int, np.int64)):
             time_ret = [time_ret]
 
         if time_ret[0] < 0:
@@ -1097,7 +1097,7 @@ def fill_grids(H, nspec=0, FD=None):
 
     #    assert H.direction == 'backward', "fill_backward is only valid for backward runs"
     # make sure npsec is iterable
-    if isinstance(nspec, int):
+    if isinstance(nspec, (int, np.int64)):
         species = [nspec]
     else:
         species = nspec

diff --git a/reflexible/conv2netcdf4/tests/test_netcdf4_structure.py b/reflexible/conv2netcdf4/tests/test_netcdf4_structure.py
@@ -179,7 +179,7 @@ def test_LAGE(self):
     def test_species_mr(self):
         attr_names = ('units', 'long_name', 'decay', 'weightmolar',
                       'ohreact', 'kao', 'vsetaver', 'spec_ass')
-        for i in range(0, self.H.nspec):
+        for i in range(self.H.nspec):
             anspec = "%3.3d" % (i + 1)
             if self.ncid.iout in (1, 3, 5):
                 var_name = "spec" + anspec + "_mr"
@@ -194,7 +194,7 @@ def test_species_mr(self):
     def test_species_pptv(self):
         attr_names = ('units', 'long_name', 'decay', 'weightmolar',
                       'ohreact', 'kao', 'vsetaver', 'spec_ass')
-        for i in range(0, self.H.nspec):
+        for i in range(self.H.nspec):
             anspec = "%3.3d" % (i + 1)
             if self.ncid.iout in (2, 3):
                 var_name = "spec" + anspec + "_pptv"
@@ -217,7 +217,7 @@ def setup(self, request, tmpdir):
     def test_WDspecies(self):
         attr_names = ('units', 'weta', 'wetb', 'weta_in', 'wetb_in',
                       'wetc_in', 'wetd_in', 'dquer', 'henry')
-        for i in range(0, self.H.nspec):
+        for i in range(self.H.nspec):
             anspec = "%3.3d" % (i + 1)
             if self.dataset.wetdep:
                 var_name = "WD_spec" + anspec
@@ -232,7 +232,7 @@ def test_WDspecies(self):
     def test_DDspecies(self):
         attr_names = ('units', 'dryvel', 'reldiff', 'henry', 'f0',
                       'dquer', 'density', 'dsigma')
-        for i in range(0, self.H.nspec):
+        for i in range(self.H.nspec):
             anspec = "%3.3d" % (i + 1)
             if self.dataset.drydep:
                 var_name = "DD_spec" + anspec

diff --git a/reflexible/flexpart.py b/reflexible/flexpart.py
@@ -48,7 +48,7 @@ def __init__(self, pathnames, nested=False):
         # Other config files
         self.Command = read_conffiles("COMMAND", self.fp_options, None)
         self.Releases = read_conffiles("RELEASES", self.fp_options, None)
-        self.Species = read_species(self.fp_options, self.Header.nspec)
+        self.Species = read_species(self.fp_options, self.Header.nspec, spec_ids=self.Releases['spec_ids'])
 
     def __str__(self):
         return "Flexpart('%s', nested=%s)" % (self.pathnames, self.nested)

diff --git a/reflexible/scripts/create_ncfile.py b/reflexible/scripts/create_ncfile.py
@@ -41,7 +41,7 @@
 MIN_SIZE = False
 
 
-def read_species(options_dir, nspec):
+def read_species(options_dir, nspec, spec_ids=None):
     """Read metadata from SPECIES dir and return it as a dict.
 
     TODO: maybe the current version is specific of FLEXPART 9.
@@ -72,7 +72,9 @@ def read_species(options_dir, nspec):
         }
 
     dspec = defaultdict(list)
-    for spec in range(1, nspec+1):
+    if spec_ids is None:
+        spec_ids = range(1, nspec+1)
+    for spec in spec_ids:
         path = os.path.join(species_dir, "SPECIES_%03d" % spec)
         with open(path) as fspec:
             for line in fspec:
@@ -387,7 +389,7 @@ def write_header(H, ncid, wetdep, drydep, write_releases, species):
 
     chunksizes = (H.numxgrid, H.numygrid, H.numzgrid, 1, 1, 1)[::-1]
     dep_chunksizes = (H.numxgrid, H.numygrid, 1, 1, 1)[::-1]
-    for i in range(0, H.nspec):
+    for i in range(H.nspec):
         anspec = "%3.3d" % (i + 1)
         # iout: 1 conc. output (ng/m3), 2 mixing ratio (pptv), 3 both,
         # 4 plume traject, 5=1+4
@@ -608,7 +610,7 @@ def create_ncfile(pathnames, nested, wetdep=False, drydep=False,
     if options_dir:
         command = read_conffiles("COMMAND", options_dir, command_path)
         releases = read_conffiles("RELEASES", options_dir, releases_path)
-        species = read_species(options_dir, H.nspec)
+        species = read_species(options_dir, H.nspec, spec_ids=releases['spec_ids'])
 
     if outfile:
         # outfile has priority over previous flags