ageller · agurvich · Jun 6, 2022 · Jun 6, 2022 · Jun 6, 2022 · Jun 6, 2022
diff --git a/src/firefly/data_reader/octree.py b/src/firefly/data_reader/octree.py
@@ -2,23 +2,11 @@
 from operator import attrgetter
 import os
 import numpy as np
+from .json_utils import write_to_json
 
-import json
 
 from .binary_writer import OctBinaryWriter
 
-#https://stackoverflow.com/questions/56250514/how-to-tackle-with-error-object-of-type-int32-is-not-json-serializable
-#to help with dumping to json
-class npEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj,np.ndarray):
-            return obj.tolist()
-        if isinstance(obj, np.int32):
-            return int(obj)
-        if isinstance(obj, np.float32):
-            return float(obj)
-        return json.JSONEncoder.default(self, obj)
-
 octant_offsets = 0.25 * np.array([
     [-1,-1,-1], ## x < 0, y < 0, z < 0 -> 000
     [ 1,-1,-1], ## x > 0, y < 0, z < 0 -> 100
@@ -195,7 +183,8 @@ def __repr__(self):
     def __init__(
         self,
         particle_group,
-        max_npart_per_node=1000):
+        max_npart_per_node=1000,
+        use_lod=True):
         '''
             inputFile : path to the file. For now only text files.
             NMemoryMax : the maximum number of particles to save in the memory before writing to a file
@@ -316,14 +305,28 @@ def __init__(
             max_npart_per_node=max_npart_per_node)
         }
 
+        ## LoD masks should be boolean type, not indices
+        if use_lod:
+            self.lod_masks = []
+            for lod_dec in [100]:
+                inds = np.arange(self.coordinates.shape[0])
+                np.random.default_rng().shuffle(inds)
+                mask = np.zeros(self.coordinates.shape[0])
+                mask[inds[::lod_dec]] = 1
+                self.lod_masks += [mask.astype(bool)]
+        else: self.lod_masks = [np.zeros(self.coordinates.shape[0],dtype=bool)]
+
     def buildOctree(self,start_octant=''):
 
         node = self.nodes[start_octant]
         end = self.coordinates.shape[0]
         velocity = None
         rgba_color = None
         print(f"Bulding octree of {end:d} points")
-        for i,(point,fields) in enumerate(zip(self.coordinates,self.fieldss)):
+        for i,(point,fields) in enumerate(zip(
+            self.coordinates[~self.lod_masks[0]],
+            self.fieldss[~self.lod_masks[0]])):
+
             if not (i % 10000): print("%.2f"%(i/end*100)+"%",end='\t') 
 
             if self.velocities is not None: velocity = self.velocities[i]
@@ -335,6 +338,10 @@ def buildOctree(self,start_octant=''):
         ## if there are any outliers, let's stuff them in the root node
         self.__store_outliers_in_root()
 
+
+        ## handle level of detail insertion
+        if len(self.lod_masks) > 1: raise NotImplementedError("Only base LoD is implemented.")
+
         ## we want the nodelist to be sorted s.t. the highest refinement levels are first
         ##  so that if we decide to prune the tree all children will get added to their parent before
         ##  the parent is itself pruned. we'll do that with a "complex sort" (google it)
@@ -553,7 +560,8 @@ def write_octree_json(
             #'header':flag_dict,
             ##'node_arrays':node_arrays,
             'octree':{},
-            'Coordinates_flat':np.zeros(3*num_nodes)
+            'Coordinates_flat':np.zeros(3*num_nodes),
+            'use_lod':bool(np.sum(self.lod_masks[0])>0)
             }
 
         if self.velocities is not None: json_dict['Velocities_flat'] = np.zeros(3*num_nodes)
@@ -639,8 +647,7 @@ def write_octree_json(
 
         print('done!',flush=True)
 
-        with open(octree_fname, 'w') as f:
-            json.dump(json_dict, f, cls=npEncoder)
+        write_to_json(json_dict, octree_fname)
 
         octree_fname = octree_fname.split(os.path.join('static','data',''))[1]
         return octree_fname,num_nodes
diff --git a/src/firefly/data_reader/particlegroup.py b/src/firefly/data_reader/particlegroup.py
@@ -83,7 +83,6 @@ def __init__(
         field_colormap_flags=None,
         field_radius_flags=None,
         decimation_factor=1,
-        filenames_and_nparts=None,
         attached_settings=None,
         loud=True,
         **settings_kwargs):
@@ -121,19 +120,6 @@ def __init__(
         :param decimation_factor: factor by which to reduce the data randomly 
                 i.e. :code:`data=data[::decimation_factor]`, defaults to 1
         :type decimation_factor: int, optional
-        :param filenames_and_nparts: Allows you to manually control how the particles
-            are distributed among the sub-JSON files, it is
-            **highly recommended that you leave this to** None such that particles are equally
-            distributed among the :code:`.jsons` but if for whatever reason you need fine-tuning
-            you should pass a list of tuples in the form 
-
-            :code:`[("json_name0.json",nparts_this_file0),("json_name1.json",nparts_this_file1) ... ]`
-
-            where where the sum of :code:`nparts_this_file%d` is exactly :code:`nparts`. These files
-            will automatically be added to :code:`filenames.json` if you use
-            an attached :class:`firefly.data_reader.Reader` and 
-            its :class:`~firefly.data_reader.Reader.writeToDisk` method, defaults to None
-        :type filenames_and_nparts: list of tuple of (str,int), optional
         :param attached_settings: :class:`~firefly.data_reader.Settings` instance that should be linked
             to this particle group such that GUI elements are connected correctly. If not provided here
             can be attached after-the-fact using the
@@ -235,19 +221,6 @@ def __init__(
         self.field_colormap_flags = np.array(field_colormap_flags)
         self.field_radius_flags = np.array(field_radius_flags)
 
-        ## validate filenames and nparts if anyone was so foolhardy to
-        ##  send it in themselves
-        if filenames_and_nparts is not None:
-            try:
-                assert type(filenames_and_nparts[0]) == tuple
-                assert type(filenames_and_nparts[0][0]) == str
-                assert type(filenames_and_nparts[0][1]) == int
-            except AssertionError:
-                ValueError("filenames_and_nparts should be a list of tuples of strings and ints")
-
-        self.filenames_and_nparts = filenames_and_nparts
-
-
         ## TODO how do these interface with javascript code?
         self.radiusFunction = None
         self.weightFunction = None
@@ -483,6 +456,13 @@ def outputToDict(
 
     def createOctree(self,npart_min_node=2e2,npart_max_node=1e3):
 
+        shuffle_indices = np.arange(self.coordinates.shape[0])
+        ## shuffles in-place
+        np.random.default_rng().shuffle(shuffle_indices)
+        self.coordinates = self.coordinates[shuffle_indices]
+        if self.velocities is not None: self.velocities = self.velocities[shuffle_indices]
+        if len(self.field_names) > 0:  self.field_arrays = self.field_arrays[...,shuffle_indices]
+
         ## initialize the octree
         self.octree = Octree(self,npart_max_node)
 
@@ -561,7 +541,7 @@ def outputToJSON(
                 if "json" in fname:
                     os.remove(os.path.join(full_path,fname))
 
-        filenames_and_nparts = self.filenames_and_nparts
+        filenames_and_nparts = []
         ## if the user did not specify how we should partition the data between
         ##  sub-JSON files then we'll just do it equally
         if filenames_and_nparts is None:
@@ -644,31 +624,39 @@ def outputToFFLY(
                 self.UIname,
                 max_npart_per_file)
             ## mimic return signature: file_array, filenames_and_nparts
-            return [tree_filename],[(tree_filename,num_nodes)]
+            octree_file_array = [tree_filename]
+
+            ## output the lowest level of detail using code below
+            if np.sum(self.octree.lod_masks[0]) > 0: 
+                octree_filenames_and_nparts = [(tree_filename,0)]
+                self.dec_inds = self.octree.lod_masks[0]
+            ## don't need to output the lowest level of detail
+            else: 
+                octree_filenames_and_nparts = [(tree_filename,num_nodes)]
+                return file_array,octree_filenames_and_nparts
+        else:
+            ## shuffle particles and decimate as necessary, save the output in dec_inds
+            self.getDecimationIndexArray()
 
-        ## shuffle particles and decimate as necessary, save the output in dec_inds
-        self.getDecimationIndexArray()
+            octree_filenames_and_nparts = []
+            octree_file_array = []
 
-        filenames_and_nparts = self.filenames_and_nparts
-        ## if the user did not specify how we should partition the data between
-        ##  sub-JSON files then we'll just do it equally
-        if filenames_and_nparts is None:
-            ## determine if we were passed a boolean mask or a index array
-            if self.dec_inds.dtype == bool:
-                nparts = np.sum(self.dec_inds)
-                self.dec_inds = np.argwhere(self.dec_inds) ## convert to an index array
-            else: nparts = self.dec_inds.shape[0]
+        ## determine if we were passed a boolean mask or a index array
+        if self.dec_inds.dtype == bool:
+            nparts = np.sum(self.dec_inds)
+            self.dec_inds = np.argwhere(self.dec_inds) ## convert to an index array
+        else: nparts = self.dec_inds.shape[0]
 
-            ## how many sub-files are we going to need?
-            nfiles = int(nparts/max_npart_per_file + ((nparts%max_npart_per_file)!=0))
+        ## how many sub-files are we going to need?
+        nfiles = int(nparts/max_npart_per_file + ((nparts%max_npart_per_file)!=0))
 
-            ## how many particles will each file have and what are they named?
-            filenames = [os.path.join(short_data_path,"%s%s%03d.ffly"%(file_prefix,self.UIname,i_file)) for i_file in range(nfiles)]
-            nparts = [min(max_npart_per_file,nparts-(i_file)*(max_npart_per_file)) for i_file in range(nfiles)]
+        ## how many particles will each file have and what are they named?
+        filenames = [os.path.join(short_data_path,"%s%s%03d.ffly"%(file_prefix,self.UIname,i_file)) for i_file in range(nfiles)]
+        nparts = [min(max_npart_per_file,nparts-(i_file)*(max_npart_per_file)) for i_file in range(nfiles)]
 
-            filenames_and_nparts = list(zip(filenames,nparts))
-
+        filenames_and_nparts = list(zip(filenames,nparts))
         file_array = []
+
         ## loop through the sub-files
         cur_index = 0
 
@@ -703,4 +691,4 @@ def outputToFFLY(
             ## move onto the next file
             cur_index += nparts_this_file
 
-        return file_array,filenames_and_nparts
+        return file_array+octree_file_array,filenames_and_nparts+octree_filenames_and_nparts