diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4888f84 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.xml +*.pyc diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..9aa63d2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: python +python: + - "3.5" + +#command to run tests +script: nosetests diff --git a/__pycache__/__init__.cpython-35.pyc b/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000..1f25cac Binary files /dev/null and b/__pycache__/__init__.cpython-35.pyc differ diff --git a/__pycache__/analytics.cpython-35.pyc b/__pycache__/analytics.cpython-35.pyc new file mode 100644 index 0000000..a6d9bdc Binary files /dev/null and b/__pycache__/analytics.cpython-35.pyc differ diff --git a/__pycache__/io_geojson.cpython-35.pyc b/__pycache__/io_geojson.cpython-35.pyc new file mode 100644 index 0000000..74d4c6b Binary files /dev/null and b/__pycache__/io_geojson.cpython-35.pyc differ diff --git a/__pycache__/point.cpython-35.pyc b/__pycache__/point.cpython-35.pyc new file mode 100644 index 0000000..0eb0e30 Binary files /dev/null and b/__pycache__/point.cpython-35.pyc differ diff --git a/__pycache__/utils.cpython-35.pyc b/__pycache__/utils.cpython-35.pyc new file mode 100644 index 0000000..c03f32a Binary files /dev/null and b/__pycache__/utils.cpython-35.pyc differ diff --git a/analytics.py b/analytics.py index e69de29..0a2544f 100644 --- a/analytics.py +++ b/analytics.py @@ -0,0 +1,108 @@ +from . import utils + + +def find_largest_city(gj): + """ + Iterate through a geojson feature collection and + find the largest city. Assume that the key + to access the maximum population is 'pop_max'. + + Parameters + ---------- + gj : dict + A GeoJSON file read in as a Python dictionary + + Returns + ------- + city : str + The largest city + + population : int + The population of the largest city + """ + city = None + max_population = 0 + for item in gj["features"]: + props = item["properties"] + if props["pop_max"] > max_population: + max_population = props["pop_max"] + city = props["adm1name"] + + return city, max_population + + +def average_nearest_neighbor_distance(points_list, mark = None): + """ + Given a set of points, compute the average nearest neighbor. + + Parameters + ---------- + points_list : list + A list of Point objects. + mark : str + An optional string to filter the inputs by a certain color. + + Returns + ------- + mean_d : float + Average nearest neighbor distance + + References + ---------- + Clark and Evan (1954 Distance to Nearest Neighbor as a + Measure of Spatial Relationships in Populations. Ecology. 35(4) + p. 445-453. + """ + points = None + if mark is None: + # User passed in no mark, we will use the entire points_list. + points = points_list + else: + points = list(filter(lambda current_point: current_point.mark['color'] == mark, points_list)) + + mean_d = 0 + temp_nearest_neighbor = None + # Average the nearest neighbor distance of all points. + for i, point in enumerate(points): + # Find the nearest neighbor to this point. + for j, otherPoint in enumerate(points): + # You are not your own neighbor. + if i == j: + continue + # To avoid multiple calculations, we'll cache the result. + current_distance = utils.euclidean_distance((point.x, point.y), (otherPoint.x, otherPoint.y)) + # nearest neighbor will be None if this is the first neighbor we have iterated over. + if temp_nearest_neighbor is None: + temp_nearest_neighbor = current_distance + elif temp_nearest_neighbor > current_distance: + temp_nearest_neighbor = current_distance + # At this point, we've found point's nearest neighbor distance. + # Add in that distance. + mean_d += temp_nearest_neighbor + temp_nearest_neighbor = None + + # Divide by number of points. + mean_d /= len(points) + + return mean_d + + +def permutations(p=99, mark=None): + n = 100 + to_return = [] + for i in range(p): + to_return.append( + average_nearest_neighbor_distance( + utils.create_random(n), + mark + ) + ) + return to_return + + +def compute_critical(p): + """ + Calculates the critical points (lowest distance and greatest distance) in a set of + randomly generated permutations (created using permutations(p)). + """ + return min(p), max(p) \ No newline at end of file diff --git a/io_geojson.py b/io_geojson.py index e69de29..fd4b2ab 100644 --- a/io_geojson.py +++ b/io_geojson.py @@ -0,0 +1,23 @@ +import json + + +def read_geojson(input_file): + """ + Read a geojson file + + Parameters + ---------- + input_file : str + The PATH to the data to be read + + Returns + ------- + gj : dict + An in memory version of the geojson + """ + # Please use the python json module (imported above) + # to solve this one. + with open(input_file, 'r') as fp: + gj = json.load(fp) + return gj + diff --git a/point.py b/point.py index e69de29..f41ba0d 100644 --- a/point.py +++ b/point.py @@ -0,0 +1,18 @@ +from . import utils +import random + + +class Point(object): + def __init__(self, x, y, **mark): + self.x = x + self.y = y + self.mark = mark + + def is_coincident(self, other_point): + return utils.check_coincident((self.x, self.y), (other_point.x, other_point.y)) + + def shift_point(self, delta_x, delta_y): + result = utils.shift_point((self.x, self.y), delta_x, delta_y) + self.x = utils.getx(result) + self.y = utils.gety(result) + diff --git a/tests/__pycache__/__init__.cpython-35.pyc b/tests/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000..3692af1 Binary files /dev/null and b/tests/__pycache__/__init__.cpython-35.pyc differ diff --git a/tests/__pycache__/functional_test.cpython-35.pyc b/tests/__pycache__/functional_test.cpython-35.pyc new file mode 100644 index 0000000..8e3c366 Binary files /dev/null and b/tests/__pycache__/functional_test.cpython-35.pyc differ diff --git a/tests/__pycache__/point_test.cpython-35.pyc b/tests/__pycache__/point_test.cpython-35.pyc new file mode 100644 index 0000000..504c79e Binary files /dev/null and b/tests/__pycache__/point_test.cpython-35.pyc differ diff --git a/tests/__pycache__/test_analytics.cpython-35.pyc b/tests/__pycache__/test_analytics.cpython-35.pyc new file mode 100644 index 0000000..d235e70 Binary files /dev/null and b/tests/__pycache__/test_analytics.cpython-35.pyc differ diff --git a/tests/__pycache__/test_io_geojson.cpython-35.pyc b/tests/__pycache__/test_io_geojson.cpython-35.pyc new file mode 100644 index 0000000..932fa18 Binary files /dev/null and b/tests/__pycache__/test_io_geojson.cpython-35.pyc differ diff --git a/tests/__pycache__/test_utils.cpython-35.pyc b/tests/__pycache__/test_utils.cpython-35.pyc new file mode 100644 index 0000000..2f03b5e Binary files /dev/null and b/tests/__pycache__/test_utils.cpython-35.pyc differ diff --git a/tests/functional_test.py b/tests/functional_test.py index 596af78..692dfb3 100644 --- a/tests/functional_test.py +++ b/tests/functional_test.py @@ -4,6 +4,7 @@ from .. import analytics from .. import io_geojson from .. import utils +from .. import point class TestFunctionalPointPattern(unittest.TestCase): @@ -12,9 +13,14 @@ def setUp(self): random.seed(12345) i = 0 self.points = [] + marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] while i < 100: seed = (round(random.random(),2), round(random.random(),2)) - self.points.append(seed) + self.points.append(point.Point( + seed[0], # Random x coordinate + seed[1], # Random y coordinate + color=random.choice(marks))) # Random mark + n_additional = random.randint(5,10) i += 1 c = random.choice([0,1]) @@ -23,7 +29,7 @@ def setUp(self): x_offset = random.randint(0,10) / 100 y_offset = random.randint(0,10) / 100 pt = (round(seed[0] + x_offset, 2), round(seed[1] + y_offset,2)) - self.points.append(pt) + self.points.append(point.Point(pt[0], pt[1], color=random.choice(marks))) i += 1 if i == 100: break @@ -39,29 +45,72 @@ def test_point_pattern(self): the point process. """ random.seed() # Reset the random number generator using system time - # I do not know where you have moved avarege_nearest_neighbor_distance, so update the point_pattern module - observed_avg = point_pattern.average_nearest_neighbor_distance(self.points) - self.assertAlmostEqual(0.027, observed_avg, 3) + + observed_avg = analytics.average_nearest_neighbor_distance(self.points) + + self.assertAlmostEqual(0.033, observed_avg, 3) # Again, update the point_pattern module name for where you have placed the point_pattern module # Also update the create_random function name for whatever you named the function to generate # random points - rand_points = point_pattern.create_random(100) + rand_points = utils.create_random(100) self.assertEqual(100, len(rand_points)) # As above, update the module and function name. - permutations = point_pattern.permutations(99) + permutations = analytics.permutations(99) self.assertEqual(len(permutations), 99) self.assertNotEqual(permutations[0], permutations[1]) # As above, update the module and function name. - lower, upper = point_pattern.compute_critical(permutations) + lower, upper = analytics.compute_critical(permutations) self.assertTrue(lower > 0.03) self.assertTrue(upper < 0.07) self.assertTrue(observed_avg < lower or observed_avg > upper) # As above, update the module and function name. - significant = point_pattern.check_significant(lower, upper, observed) + significant = utils.check_significant(lower, upper, observed_avg) self.assertTrue(significant) - self.assertTrue(False) \ No newline at end of file + def test_marked_point_pattern(self): + """ + Performs the same tests as test_point_pattern, but takes into consideration + different marks. + """ + marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] + + random.seed() # Reset the random number generator using system time + + expected_results = { + 'red': 0.116, + 'orange': 0.055, + 'yellow': 0.085, + 'green': 0.087, + 'blue': 0.126, + 'indigo': 0.179, + 'violet': 0.150 + }; + for mark in marks: + observed_avg = analytics.average_nearest_neighbor_distance(self.points, mark) + self.assertAlmostEqual(expected_results[mark], observed_avg, 3) + + # Again, update the point_pattern module name for where you have placed the point_pattern module + # Also update the create_random function name for whatever you named the function to generate + # random points + rand_points = utils.create_random(100) + self.assertEqual(100, len(rand_points)) + + # As above, update the module and function name. + for mark in marks: + # As above, update the module and function name. + permutations = analytics.permutations(99) + self.assertEqual(len(permutations), 99) + self.assertNotEqual(permutations[0], permutations[1]) + + lower, upper = analytics.compute_critical(permutations) + self.assertTrue(lower > 0.03) + self.assertTrue(upper < 0.07) + self.assertTrue(observed_avg < lower or observed_avg > upper) + + # As above, update the module and function name. + significant = utils.check_significant(lower, upper, observed_avg) + self.assertTrue(significant) \ No newline at end of file diff --git a/tests/point_test.py b/tests/point_test.py new file mode 100644 index 0000000..c2542b1 --- /dev/null +++ b/tests/point_test.py @@ -0,0 +1,65 @@ +import unittest +import random + +from ..point import Point + + +class TestPointClass(unittest.TestCase): + def setUp(self): + pass + + def coordinates_properly_set(self, x, y): + """ + This test checks if the Point constructor correctly + assigns the x and y coordinates to the appropriate variables. + """ + test_point = Point(x, y) + self.assertEqual(test_point.x, x) + self.assertEqual(test_point.y, y) + + def test_coincident(self): + """ + This test checks if the is_coincident method works properly. + """ + point_a = Point(10, 37) + point_b = Point(10, 37) + point_c = Point(10, 36) + point_d = Point(0, 37) + self.assertTrue(point_a.is_coincident(point_b)) + self.assertFalse(point_a.is_coincident(point_c)) + self.assertFalse(point_a.is_coincident(point_d)) + + def test_shift(self): + """ + This test checks if the shift_point method works properly. + """ + test_point = Point(10, 37) + test_point.shift_point(5, 10) + self.assertEqual(test_point.x, 15) + self.assertEqual(test_point.y, 47) + + def test_marking(self): + """ + This test verifies that marked points can be created properly. + """ + + def get_occurrence_count(points, mark): + """ + This is a helper method for test_marking. + Returns the number of occurrences of a certain mark in a list of points. + """ + return len(list(filter(lambda current_point: current_point.mark['color'] == mark, points))) + + random.seed(9631) + marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] + points = [] + for i in range(20): + points.append(Point(0, 0, color=random.choice(marks))) + + self.assertEqual(get_occurrence_count(points, 'red'), 5) + self.assertEqual(get_occurrence_count(points, 'orange'), 1) + self.assertEqual(get_occurrence_count(points, 'yellow'), 2) + self.assertEqual(get_occurrence_count(points, 'green'), 3) + self.assertEqual(get_occurrence_count(points, 'blue'), 1) + self.assertEqual(get_occurrence_count(points, 'indigo'), 5) + self.assertEqual(get_occurrence_count(points, 'violet'), 3) diff --git a/tests/test_analytics.py b/tests/test_analytics.py new file mode 100644 index 0000000..a174409 --- /dev/null +++ b/tests/test_analytics.py @@ -0,0 +1,19 @@ +import os +import sys +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import analytics + + +class TestAnalytics(unittest.TestCase): + + def setUp(self): + pass + + def test_permutations(self): + self.assertEqual(len(analytics.permutations(300)), 300) + + def test_critical(self): + criticals = analytics.compute_critical([0.5, 1.0, 0.99, 3.14, 0.987, 0.102]) + self.assertTrue(criticals[0] == 0.102 and criticals[1] == 3.14) \ No newline at end of file diff --git a/tests/test_io_geojson.py b/tests/test_io_geojson.py new file mode 100644 index 0000000..9aeaf06 --- /dev/null +++ b/tests/test_io_geojson.py @@ -0,0 +1,12 @@ +import os +import sys +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import io_geojson + + +class TestIoGeoJson(unittest.TestCase): + + def setUp(self): + pass \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..e7095a9 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,19 @@ +import os +import sys +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import utils + + +class TestUtils(unittest.TestCase): + + def setUp(self): + pass + + def test_create_random(self): + self.assertEqual(len(utils.create_random(1000)), 1000) + + def test_check_significant(self): + self.assertTrue(utils.check_significant(10, 30, 9.9)) + self.assertFalse(utils.check_significant(9.9, 30, 10)) \ No newline at end of file diff --git a/utils.py b/utils.py index e69de29..b82d4de 100644 --- a/utils.py +++ b/utils.py @@ -0,0 +1,288 @@ +import math +import random +from .point import Point + + +def create_random(n): + """ + Generates n random points. Coordinates are between 0 and 1.00 inclusive. + """ + rng = random.Random() + marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] + to_return = [] + for i in range(n): + to_return.append(Point( + round(rng.uniform(0, 1), 2), + round(rng.uniform(0, 1), 2), + color=rng.choice(marks))) + return to_return + + +def check_significant(lower, upper, distance): + return (distance < lower) or (distance > upper) + + +def mean_center(points): + """ + Given a set of points, compute the mean center + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + x : float + Mean x coordinate + + y : float + Mean y coordinate + """ + x = 0 + y = 0 + n = 0 + for point in points: + x += point[0] + y += point[1] + n += 1 + + x /= n + y /= n + + return x, y + + +def minimum_bounding_rectangle(points): + """ + Given a set of points, compute the minimum bounding rectangle. + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + : list + Corners of the MBR in the form [xmin, ymin, xmax, ymax] + """ + + mbr = [None,None,None,None] + for point in points: + # First iteration, everything is None. The point will + # form the initial boundaries for the rectangle. + if mbr[0] is None: + mbr[0] = point[0] + mbr[1] = point[1] + mbr[2] = point[0] + mbr[3] = point[1] + else: + # Verify that each edge is far enough. If not, extend the rectangle. + if point[0] < mbr[0]: + mbr[0] = point[0] + if point[1] < mbr[1]: + mbr[1] = point[1] + if point[0] > mbr[2]: + mbr[2] = point[0] + if point[1] > mbr[3]: + mbr[3] = point[1] + + return mbr + + +def mbr_area(mbr): + """ + Compute the area of a minimum bounding rectangle + """ + area = (mbr[3] - mbr[1]) * (mbr[2] - mbr[0]) + + return area + + +def expected_distance(area, n): + """ + Compute the expected mean distance given + some study area. + + This makes lots of assumptions and is not + necessarily how you would want to compute + this. This is just an example of the full + analysis pipe, e.g. compute the mean distance + and the expected mean distance. + + Parameters + ---------- + area : float + The area of the study area + + n : int + The number of points + """ + + expected = 0.5 * ((area / n) ** 0.5) + return expected + + +""" +Below are the functions that you created last week. +Your syntax might have been different (which is awesome), +but the functionality is identical. No need to touch +these unless you are interested in another way of solving +the assignment +""" + + +def manhattan_distance(a, b): + """ + Compute the Manhattan distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + distance : float + The Manhattan distance between the two points + """ + distance = abs(a[0] - b[0]) + abs(a[1] - b[1]) + return distance + + +def euclidean_distance(a, b): + """ + Compute the Euclidean distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + + distance : float + The Euclidean distance between the two points + """ + distance = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) + return distance + + +def shift_point(point, x_shift, y_shift): + """ + Shift a point by some amount in the x and y directions + + Parameters + ---------- + point : tuple + in the form (x,y) + + x_shift : int or float + distance to shift in the x direction + + y_shift : int or float + distance to shift in the y direction + + Returns + ------- + new_x : int or float + shited x coordinate + + new_y : int or float + shifted y coordinate + + Note that the new_x new_y elements are returned as a tuple + + Example + ------- + >>> point = (0,0) + >>> shift_point(point, 1, 2) + (1,2) + """ + x = getx(point) + y = gety(point) + + x += x_shift + y += y_shift + + return x, y + + +def check_coincident(a, b): + """ + Check whether two points are coincident + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + equal : bool + Whether the points are equal + """ + return a == b + + +def check_in(point, point_list): + """ + Check whether point is in the point list + + Parameters + ---------- + point : tuple + In the form (x,y) + + point_list : list + in the form [point, point_1, point_2, ..., point_n] + """ + return point in point_list + + +def getx(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + x coordinate + """ + return point[0] + + +def gety(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + y coordinate + """ + return point[1] \ No newline at end of file