Merge pull request #1237 from SEED-platform/v2-release-hotfix

Release hotfix (matching integrity)
SEED-platform · Feb 3, 2017 · 2ceea20 · 2ceea20
2 parents 3daf0f8 + 41923d5
commit 2ceea20
Show file tree

Hide file tree

Showing 2 changed files with 92 additions and 39 deletions.
diff --git a/seed/data_importer/tasks.py b/seed/data_importer/tasks.py
@@ -24,6 +24,7 @@
 from celery import chord
 from celery import shared_task
 from celery.utils.log import get_task_logger
+from django.db import IntegrityError
 from django.db.models import Q
 from unidecode import unidecode
 
@@ -989,10 +990,28 @@ def union_lol(lol):
     return (canonical_states, noncanonical_states)
 
 
+# NJACHECK - Check function for accuracy
 class EquivalencePartitioner(object):
+    """ TODO: Document purpose of class. Move this class into its own location.
+    TODO: Fix lowerCamelCase methods"""
+
+    def __init__(self, equivalence_class_description, identity_fields):
+        # If identify fields are not equal between two classes then we
+        # force the class to not be equivalent.
+
+        # self.equiv_compare_func = self.makeKeyEquivalenceFunction(equivalence_class_description)
+        self.equiv_comparison_key_func = self.makeResolvedKeyCalculationFunction(
+            equivalence_class_description)
+        self.equiv_canonical_key_func = self.makeCanonicalKeyCalculationFunction(
+            equivalence_class_description)
+
+        self.identity_key_func = self.makeCanonicalKeyCalculationFunction(
+            [(x,) for x in identity_fields])
+
+        return
 
     @classmethod
-    def makeDefaultStateEquivalence(kls, equivalence_type):
+    def make_default_state_equivalence(kls, equivalence_type):
         if equivalence_type == PropertyState:
             return kls.makePropertyStateEquivalence()
         elif equivalence_type == TaxLotState:
@@ -1046,45 +1065,54 @@ def calculate_key_equivalence(key1, key2):
 
     @classmethod
     def makePropertyStateEquivalence(kls):
-        property_equivalence_fields = [("pm_property_id", "custom_id_1"),
-                                       ("custom_id_1",),
-                                       ("normalized_address",)]
-        return kls(property_equivalence_fields)
+        property_equivalence_fields = [
+            ("pm_property_id", "custom_id_1"),
+            ("custom_id_1",),
+            ("normalized_address",)
+        ]
+        property_noequivalence_fields = ["pm_property_id"]
+
+        return kls(property_equivalence_fields, property_noequivalence_fields)
 
     @classmethod
     def makeTaxLotStateEquivalence(kls):
-        tax_lot_equivalence_fields = [("jurisdiction_tax_lot_id", "custom_id_1"),
-                                      ("custom_id_1",),
-                                      ("normalized_address",)]
-        return kls(tax_lot_equivalence_fields)
-
-    def __init__(self, equivalence_class_description):
-        # self.equiv_compare_func = self.makeKeyEquivalenceFunction(equivalence_class_description)
-        self.equiv_comparison_key_func = self.makeResolvedKeyCalculationFunction(
-            equivalence_class_description)
-        self.equiv_canonical_key_func = self.makeCanonicalKeyCalculationFunction(
-            equivalence_class_description)
-        return
+        tax_lot_equivalence_fields = [
+            ("jurisdiction_tax_lot_id", "custom_id_1"),
+            ("custom_id_1",),
+            ("normalized_address",)
+        ]
+        tax_lot_noequivalence_fields = ["jurisdiction_tax_lot_id"]
+        return kls(tax_lot_equivalence_fields, tax_lot_noequivalence_fields)
 
     def calculate_comparison_key(self, obj):
         return self.equiv_comparison_key_func(obj)
 
     def calculate_canonical_key(self, obj):
         return self.equiv_canonical_key_func(obj)
 
-    # def calculate_object_equivalence(self, key, obj):
-    #     return self.equiv_compare_func(key, obj)
+    def calculate_identity_key(self, obj):
+        return self.identity_key_func(obj)
 
     def key_needs_merging(self, original_key, new_key):
         return True in [not a and b for (a, b) in zip(original_key, new_key)]
 
     def merge_keys(self, key1, key2):
         return [a if a else b for (a, b) in zip(key1, key2)]
 
+    def identities_are_different(self, key1, key2):
+        for (x, y) in zip(key1, key2):
+            if x is None or y is None:
+                continue
+            if x != y:
+                return True
+        else:
+            return False
+
     def calculate_equivalence_classes(self, list_of_obj):
         # TODO: Finish writing the equivalence class code.
 
         equivalence_classes = collections.defaultdict(list)
+        identities_for_equivalence = {}
 
         # There is some subtlety with whether we use "comparison" keys
         # or "canonical" keys.  This reflects the difference between
@@ -1098,17 +1126,24 @@ def calculate_equivalence_classes(self, list_of_obj):
         for (ndx, obj) in enumerate(list_of_obj):
             cmp_key = self.calculate_comparison_key(obj)
             can_key = self.calculate_canonical_key(obj)
+            identity_key = self.calculate_identity_key(obj)
 
             for class_key in equivalence_classes:
-                if self.calculate_key_equivalence(class_key, cmp_key):
+                if self.calculate_key_equivalence(class_key,
+                                                  cmp_key) and not self.identities_are_different(
+                        identities_for_equivalence[class_key], identity_key):
+
+                    # Must check the identities to make sure all is a-ok.
                     equivalence_classes[class_key].append(ndx)
 
                     if self.key_needs_merging(class_key, cmp_key):
                         merged_key = self.merge_keys(class_key, cmp_key)
                         equivalence_classes[merged_key] = equivalence_classes.pop(class_key)
+                        identities_for_equivalence[merged_key] = identity_key
                     break
             else:
                 equivalence_classes[can_key].append(ndx)
+                identities_for_equivalence[can_key] = identity_key
         return equivalence_classes  # TODO: Make sure return is correct on this.
 
 
@@ -1117,9 +1152,24 @@ def match_and_merge_unmatched_objects(unmatched_states, partitioner, org, import
     unmatched_tax_lot_states and returns a set of states that
     correspond to unmatched states."""
 
+    _log.debug("Starting to map_and_merge_unmatched_objects")
+
+    # Sort unmatched states/This shouldn't be happening!
+    unmatched_states.sort(key=lambda state: state.pk)
+
     # current_match_cycle = import_file.cycle
     # current_match_cycle = Cycle.objects.filter(organization = org).order_by('-start').first()
 
+    def getattrdef(obj, attr, default):
+        if hasattr(obj, attr):
+            return getattr(obj, attr)
+        else:
+            return default
+
+    keyfunction = lambda ndx: (getattrdef(unmatched_states[ndx], "release_date", None),
+                               getattrdef(unmatched_states[ndx], "generation_date", None),
+                               getattrdef(unmatched_states[ndx], "pk", None))
+
     # This removes any states that are duplicates,
     equivalence_classes = partitioner.calculate_equivalence_classes(unmatched_states)
 
@@ -1128,6 +1178,8 @@ def match_and_merge_unmatched_objects(unmatched_states, partitioner, org, import
     merged_objects = []
 
     for (class_key, class_ndxs) in equivalence_classes.items():
+        class_ndxs.sort(key=keyfunction)
+
         if len(class_ndxs) == 1:
             merged_objects.append(unmatched_states[class_ndxs[0]])
             continue
@@ -1147,6 +1199,7 @@ def match_and_merge_unmatched_objects(unmatched_states, partitioner, org, import
         else:
             merged_objects.append(merged_result)
 
+    _log.debug("DONE with map_and_merge_unmatched_objects")
     return merged_objects, equivalence_classes.keys()
 
 
@@ -1168,7 +1221,7 @@ def merge_unmatched_into_views(unmatched_states, partitioner, org, import_file):
         ParentAttrName = "property"
     elif isinstance(unmatched_states[0], TaxLotState):
         ObjectViewClass = TaxLotView
-        ParentAttrName = "tax_lot"
+        ParentAttrName = "taxlot"
     else:
         raise ValueError("Unknown class '{}' passed to merge_unmatched_into_views".format(
             type(unmatched_states[0])))
@@ -1208,12 +1261,17 @@ def merge_unmatched_into_views(unmatched_states, partitioner, org, import_file):
                 else:
                     # Grab another view that has the same parent as
                     # the one we belong to.
-                    cousin_view = existing_view_states[key].values()[0].values()[0][0]
+                    cousin_view = existing_view_states[key].values()[0]
                     view_parent = getattr(cousin_view, ParentAttrName)
                     new_view = type(cousin_view)()
                     setattr(new_view, ParentAttrName, view_parent)
-                    new_view.save()
-                    matched_views.append(new_view)
+                    new_view.cycle = current_match_cycle
+                    new_view.state = unmatched
+                    try:
+                        new_view.save()
+                        matched_views.append(new_view)
+                    except IntegrityError:
+                        _log.warn("Unable to save the new view as it already exists in the db")
 
                 break
         else:
@@ -1244,7 +1302,7 @@ def _match_properties_and_taxlots(file_pk, user_pk):
         unmatched_properties, duplicate_property_states = filter_duplicated_states(
             all_unmatched_properties)
 
-        property_partitioner = EquivalencePartitioner.makeDefaultStateEquivalence(PropertyState)
+        property_partitioner = EquivalencePartitioner.make_default_state_equivalence(PropertyState)
 
         # Merge everything together based on the notion of equivalence
         # provided by the partitioner.
@@ -1263,17 +1321,15 @@ def _match_properties_and_taxlots(file_pk, user_pk):
 
     # Do the same process with the TaxLots.
     all_unmatched_tax_lots = import_file.find_unmatched_tax_lot_states()
+
     if all_unmatched_tax_lots:
         unmatched_tax_lots, duplicate_tax_lot_states = filter_duplicated_states(
             all_unmatched_tax_lots)
-
-        taxlot_partitioner = EquivalencePartitioner.makeDefaultStateEquivalence(TaxLotState)
-
+        taxlot_partitioner = EquivalencePartitioner.make_default_state_equivalence(TaxLotState)
         unmatched_tax_lots, taxlot_equivalence_keys = match_and_merge_unmatched_objects(
             unmatched_tax_lots,
             taxlot_partitioner, org,
             import_file)
-
         merged_taxlot_views = merge_unmatched_into_views(unmatched_tax_lots, taxlot_partitioner,
                                                          org, import_file)
     else:
@@ -1472,6 +1528,7 @@ def save_state_match(state1, state2, confidence=None, user=None,
     assert AuditLogClass.objects.filter(state=state1).count() >= 1
     assert AuditLogClass.objects.filter(state=state2).count() >= 1
 
+    # NJACHECK - is this logic correct?
     state_1_audit_log = AuditLogClass.objects.filter(state=state1).first()
     state_2_audit_log = AuditLogClass.objects.filter(state=state2).first()
 
@@ -1534,19 +1591,14 @@ def pair_new_states(merged_property_views, merged_taxlot_views):
     global taxlot_m2m_keygen
     global property_m2m_keygen
 
-    taxlot_m2m_keygen = EquivalencePartitioner(tax_cmp_fmt)
-    property_m2m_keygen = EquivalencePartitioner(prop_cmp_fmt)
+    taxlot_m2m_keygen = EquivalencePartitioner(tax_cmp_fmt, ["jurisdiction_tax_lot_id"])
+    property_m2m_keygen = EquivalencePartitioner(prop_cmp_fmt, ["pm_property_id", "jurisdiction_property_id"])
 
-    import time
-    st = time.time()
     property_views = PropertyView.objects.filter(state__organization=org, cycle=cycle).values_list(
         *prop_comparison_field_names)
     taxlot_views = TaxLotView.objects.filter(state__organization=org, cycle=cycle).values_list(
         *tax_comparison_field_names)
 
-    et = time.time()
-    print "{} seconds.".format(et - st)
-
     # For each of the view objects, make an
     prop_type = namedtuple("Prop", prop_comparison_fields)
     taxlot_type = namedtuple("TL", tax_comparison_fields)
@@ -1595,7 +1647,7 @@ def pair_new_states(merged_property_views, merged_taxlot_views):
         #     pdb.set_trace()
 
         pv_key = property_m2m_keygen.calculate_comparison_key(pv.state)
-        # TODO: Refactor pronto.  This iterating over the tax lot is totally bogus and I hate it.
+        # TODO: Refactor pronto.  Iterating over the tax lot is ad implementation.
         for tlk in taxlot_keys:
             if pv_key[0] and ";" in pv_key[0]:
                 for lotnum in map(lambda x: x.strip(), pv_key[0].split(";")):

diff --git a/seed/data_importer/tests/test_demo_v2.py b/seed/data_importer/tests/test_demo_v2.py
@@ -162,8 +162,9 @@ def test_demo_v2(self):
             organization=self.org,
             import_file=self.import_file_property,
         )
-        # There will be 4 properties that will still be in the DATA_STATE_MAPPING
-        self.assertEqual(len(ps), 4)
+
+        # there shouldn't be any properties left in the mapping state
+        self.assertEqual(len(ps), 0)
 
         # psv = PropertyView.objects.filter(state__organization=self.org)
         # self.assertEqual(len(psv), 12)