Geospatial-Python · gdeluca1 · Apr 16, 2016 · Apr 17, 2016 · Apr 25, 2016
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Ignore the temp folder
+tmp/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/PointPatternAnalysis.ipynb b/PointPatternAnalysis.ipynb
diff --git a/exit-24.png b/exit-24.png
diff --git a/openFolder.png b/openFolder.png
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/analytics.py b/src/analytics.py
@@ -0,0 +1,108 @@
+from . import utils
+
+
+def find_largest_city(gj):
+    """
+    Iterate through a geojson feature collection and
+    find the largest city.  Assume that the key
+    to access the maximum population is 'pop_max'.
+
+    Parameters
+    ----------
+    gj : dict
+         A GeoJSON file read in as a Python dictionary
+
+    Returns
+    -------
+    city : str
+           The largest city
+
+    population : int
+                 The population of the largest city
+    """
+    city = None
+    max_population = 0
+    for item in gj["features"]:
+        props = item["properties"]
+        if props["pop_max"] > max_population:
+            max_population = props["pop_max"]
+            city = props["adm1name"]
+
+    return city, max_population
+
+
+def average_nearest_neighbor_distance(points_list, mark = None):
+    """
+    Given a set of points, compute the average nearest neighbor.
+
+    Parameters
+    ----------
+    points_list : list
+             A list of Point objects.
+    mark : str
+            An optional string to filter the inputs by a certain color.
+
+    Returns
+    -------
+    mean_d : float
+             Average nearest neighbor distance
+
+    References
+    ----------
+    Clark and Evan (1954 Distance to Nearest Neighbor as a
+     Measure of Spatial Relationships in Populations. Ecology. 35(4)
+     p. 445-453.
+    """
+    points = None
+    if mark is None:
+        # User passed in no mark, we will use the entire points_list.
+        points = points_list
+    else:
+        points = list(filter(lambda current_point: current_point.mark['color'] == mark, points_list))
+
+    mean_d = 0
+    temp_nearest_neighbor = None
+    # Average the nearest neighbor distance of all points.
+    for i, point in enumerate(points):
+        # Find the nearest neighbor to this point.
+        for j, otherPoint in enumerate(points):
+            # You are not your own neighbor.
+            if i == j:
+                continue
+            # To avoid multiple calculations, we'll cache the result.
+            current_distance = utils.euclidean_distance((point.x, point.y), (otherPoint.x, otherPoint.y))
+            # nearest neighbor will be None if this is the first neighbor we have iterated over.
+            if temp_nearest_neighbor is None:
+                temp_nearest_neighbor = current_distance
+            elif temp_nearest_neighbor > current_distance:
+                temp_nearest_neighbor = current_distance
+        # At this point, we've found point's nearest neighbor distance.
+        # Add in that distance.
+        mean_d += temp_nearest_neighbor
+        temp_nearest_neighbor = None
+
+    # Divide by number of points.
+    mean_d /= len(points)
+
+    return mean_d
+
+
+def permutations(p=99, mark=None):
+    n = 100
+    to_return = []
+    for i in range(p):
+        to_return.append(
+            average_nearest_neighbor_distance(
+                utils.create_random(n),
+                mark
+            )
+        )
+    return to_return
+
+
+def compute_critical(p):
+    """
+    Calculates the critical points (lowest distance and greatest distance) in a set of
+    randomly generated permutations (created using permutations(p)).
+    """
+    return min(p), max(p)
diff --git a/src/io_geojson.py b/src/io_geojson.py
@@ -0,0 +1,28 @@
+import json
+
+
+def read_geojson(input_file):
+    """
+    Read a geojson file
+
+    Parameters
+    ----------
+    input_file : str
+                 The PATH to the data to be read
+
+    Returns
+    -------
+    gj : dict
+         An in memory version of the geojson
+    """
+    # Please use the python json module (imported above)
+    # to solve this one.
+    with open(input_file, 'r') as fp:
+        gj = json.load(fp)
+    return gj
+
+
+def read_tweets(tweet_file):
+    with open(tweet_file, 'r') as fp:
+        to_return = json.load(fp)
+    return to_return
diff --git a/src/point.py b/src/point.py
@@ -0,0 +1,32 @@
+from . import utils
+import random
+
+
+class Point(object):
+    def __init__(self, x, y, **mark):
+        self.x = x
+        self.y = y
+        self.mark = mark
+
+    def __add__(self, other):
+        return Point(self.x + other.x, self.y + other.y)
+
+    def __radd__(self, other):
+        return self.__add__(self, other)
+
+    def __str__(self):
+        return "({0}, {1})".format(self.x, self.y)
+
+    def __neg__(self):
+        return Point(-self.x, -self.y)
+
+    def is_coincident(self, other_point):
+        return utils.check_coincident((self.x, self.y), (other_point.x, other_point.y))
+
+    def shift_point(self, delta_x, delta_y):
+        result = utils.shift_point((self.x, self.y), delta_x, delta_y)
+        self.x = utils.getx(result)
+        self.y = utils.gety(result)
+
+    def get_array(self):
+        return [self.x, self.y]
diff --git a/src/point_pattern.py b/src/point_pattern.py
@@ -0,0 +1,154 @@
+from .point import Point
+from . import analytics
+import random
+import numpy as np
+import scipy.spatial as ss
+
+
+class PointPattern(object):
+    def __init__(self):
+        self.points = []
+
+    def add_point(self, point):
+        self.points.append(point)
+
+    def remove_point(self, index):
+        try:
+            del(self.points[index])
+        except IndexError:
+            print('Index {} not in list'.format(index))
+
+    def average_nearest_neighbor_distance(self):
+        return analytics.average_nearest_neighbor_distance(self.points)
+
+    def average_nearest_neighbor_distance_kdtree(self, mark_name=None, mark_value=None):
+        point_list = []
+        points = None
+        if mark_name is None:
+            points = self.points
+        else:
+            points = list(filter(lambda current_point: current_point.mark[mark_name] == mark_value, self.points))
+        for point in points:
+            point_list.append(point.get_array())
+        point_stack = np.vstack(point_list)
+        kdtree = ss.KDTree(point_stack)
+        distances = []
+        for p in point_stack:
+            nearest_neighbor_distance, _ = kdtree.query(p, k=2)
+            distances.append(nearest_neighbor_distance[1])
+        nn_distances = np.array(distances)
+        return np.mean(distances)
+
+    def average_nearest_neighbor_distance_numpy(self):
+        point_list = []
+        for point in self.points:
+            point_list.append(point.get_array())
+        ndarray = np.array(point_list)
+        nearest_neighbors = []
+        temp_nearest_neighbor = None
+        # Average the nearest neighbor distance of all points.
+        for i, point in enumerate(ndarray):
+            # Find the nearest neighbor to this point.
+            for j, otherPoint in enumerate(ndarray):
+                # You are not your own neighbor.
+                if i == j:
+                    continue
+                current_distance = ss.distance.euclidean(point, otherPoint)
+                # nearest neighbor will be None if this is the first neighbor we have iterated over.
+                if temp_nearest_neighbor is None:
+                    temp_nearest_neighbor = current_distance
+                elif temp_nearest_neighbor > current_distance:
+                    temp_nearest_neighbor = current_distance
+            # At this point, we've found point's nearest neighbor distance.
+            # Add in that distance.
+            nearest_neighbors.append(temp_nearest_neighbor)
+            temp_nearest_neighbor = None
+
+        return np.mean(nearest_neighbors)
+
+    def count_coincident(self):
+        """
+        Returns the number of coincident points.
+        If two points are at the same spatial location,
+        that counts as two coincident points. Three coincident points
+        means three points at the same location.
+        """
+        to_return = 0
+        handled_indices = []
+        for i, point_a in enumerate(self.points):
+            for j, point_b in enumerate(self.points):
+                if j in handled_indices:
+                    continue
+                if i == j:
+                    continue
+                if point_a.is_coincident(point_b):
+                    to_return += 1
+                    handled_indices.append(j)
+        return to_return
+
+    def list_marks(self):
+        marks = []
+        for point in self.points:
+            if 'color' in point.mark and point.mark['color'] not in marks:
+                marks.append(point.mark['color'])
+        return marks
+
+    def find_subset_with_mark(self, mark):
+        return list(filter(lambda current_point: 'color' in current_point.mark and current_point.mark['color'] == mark, self.points))
+
+    def generate_random_points(self, n=None):
+        if n is None:
+            n = len(self.points)
+        to_return = []
+        marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
+
+        for i in range(n):
+            to_return.append(Point(
+                round(random.random(), 2),
+                round(random.random(), 2),
+                color=random.choice(marks)
+            ))
+        return to_return
+
+    def generate_realizations(self, k):
+        n = 100
+        to_return = []
+        for i in range(k):
+            to_return.append(
+                analytics.average_nearest_neighbor_distance(
+                    self.generate_random_points(count=n)
+                )
+            )
+        return to_return
+
+    def get_critical_points(self):
+        return analytics.compute_critical(self.generate_realizations(100))
+
+    def compute_g(self, nsteps):
+        ds = np.linspace(0, 1, nsteps)
+        current_sum = 0
+        for i in range(nsteps):
+            o_i = ds[i]
+            # argsort puts the points in order. We want to ignore the same point each time,
+            # so we get element at index 1. An array minus a number performs a scalar
+            # operation on all elements.
+            current_sum += np.abs(ds[np.argsort(np.abs(ds - o_i))[1]] - o_i)
+        return current_sum / nsteps
+
+    def generate_random_points(self, count = 2, range_min = 0, range_max = 1, seed = None):
+        rng = None
+        if seed is None:
+            rng = np.random
+        else:
+            rng = np.random.RandomState(seed)
+            random.seed(seed)
+        pairs = rng.uniform(range_min, range_max, (count, 2))
+        to_return = []
+        marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
+        for i in range(len(pairs)):
+            to_return.append(Point(
+                pairs[i][0],
+                pairs[i][1],
+                color=random.choice(marks)
+            ))
+        return to_return
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
diff --git a/src/tests/point_pattern_test.py b/src/tests/point_pattern_test.py
@@ -0,0 +1,60 @@
+import unittest
+
+from ..point_pattern import PointPattern
+from ..point import Point
+
+
+class TestPointPattern(unittest.TestCase):
+    def setUp(self):
+        self.point_pattern = PointPattern()
+        self.point_pattern.add_point(Point(5, 6, color='red'))
+        self.point_pattern.add_point(Point(6, 5, color='orange'))
+        self.point_pattern.add_point(Point(5, 6, color='orange'))
+        self.point_pattern.add_point(Point(5, 6))
+
+    def test_coincident(self):
+        self.assertEqual(self.point_pattern.count_coincident(), 3)
+
+    def test_list_marks(self):
+        self.assertEqual(self.point_pattern.list_marks(), ['red', 'orange'])
+
+    def test_find_subset_with_mark(self):
+        self.assertEqual(len(self.point_pattern.find_subset_with_mark('orange')), 2)
+        self.assertEqual(len(self.point_pattern.find_subset_with_mark('red')), 1)
+
+    def test_generate_random(self):
+        # First test does not pass in n, making n = length of current point pattern.
+        self.assertEqual(len(self.point_pattern.generate_random_points()), 4)
+        # Second test explicitly passes in n.
+        self.assertEqual(len(self.point_pattern.generate_random_points(10)), 10)
+
+    def test_generate_realizations(self):
+        self.assertEqual(len(self.point_pattern.generate_realizations(100)), 100)
+
+    def test_compute_g(self):
+        self.assertAlmostEqual(self.point_pattern.compute_g(10), 0.111, places=3)
+        self.assertAlmostEqual(self.point_pattern.compute_g(50), 0.020, places=3)
+        self.assertAlmostEqual(self.point_pattern.compute_g(100), 0.010, places=3)
+        self.assertAlmostEqual(self.point_pattern.compute_g(1000), 0.001, places=3)
+
+    def test_nearest_neighbor(self):
+        # Test the KDTree implementation against the original implementation.
+        self.assertEqual(
+            self.point_pattern.average_nearest_neighbor_distance_kdtree(),
+            self.point_pattern.average_nearest_neighbor_distance())
+        self.assertAlmostEqual(self.point_pattern.average_nearest_neighbor_distance_kdtree(), 0.354, places=3)
+        self.assertAlmostEqual(self.point_pattern.average_nearest_neighbor_distance_numpy(), 0.354, places=3)
+
+    def test_generate_random(self):
+        points_list = []
+        marks_list = []
+        for point in self.point_pattern.generate_random_points(count = 3, seed = 1234):
+            points_list.append(point.get_array())
+            marks_list.append(point.mark['color'])
+        self.assertAlmostEqual(points_list[0][0], 0.19, places=2)
+        self.assertAlmostEqual(points_list[0][1], 0.62, places=2)
+        self.assertAlmostEqual(points_list[1][0], 0.44, places=2)
+        self.assertAlmostEqual(points_list[1][1], 0.79, places=2)
+        self.assertAlmostEqual(points_list[2][0], 0.78, places=2)
+        self.assertAlmostEqual(points_list[2][1], 0.27, places=2)
+        self.assertEqual(marks_list, ['violet', 'green', 'red'])