From 429261be9764ae557cf357cd12a275256e0bb772 Mon Sep 17 00:00:00 2001 From: Alex Babich Date: Sat, 23 Apr 2016 14:34:46 -0700 Subject: [PATCH] Added assignment_07 --- .DS_Store | Bin 0 -> 6148 bytes README.md | 3 +- __init__.py | 0 analytics.py | 228 +++++++++++++++++++++++++++++++++++++ io_geojson.py | 26 +++++ point.py | 81 +++++++++++++ tests/__init__.py | 0 tests/functional_test.py | 137 ++++++++++++++++++++++ tests/point_test.py | 61 ++++++++++ tests/test_analytics.py | 24 ++++ tests/test_io_geojson.py | 17 +++ tests/test_pointpattern.py | 6 + tests/test_utils.py | 16 +++ utils.py | 159 ++++++++++++++++++++++++++ 14 files changed, 757 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 __init__.py create mode 100644 analytics.py create mode 100644 io_geojson.py create mode 100644 point.py create mode 100644 tests/__init__.py create mode 100644 tests/functional_test.py create mode 100644 tests/point_test.py create mode 100644 tests/test_analytics.py create mode 100644 tests/test_io_geojson.py create mode 100644 tests/test_pointpattern.py create mode 100644 tests/test_utils.py create mode 100644 utils.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..02c8deff5cc9429ef6cd74a8f3797efda2b3fa89 GIT binary patch literal 6148 zcmeHKUrXaa5Z|r!Oq!P*cY@%{z6yP)RG|lPG6S=}+1Z)he0STqiod9 zhG~=^`Fg&685O;*yWAPXCy?3+(s)niGL(rdep$9{TXxr78ILzw&AQlVewoz8xYcUZ z#b&cLnYd?`wYs+Tb@!k@IX%C)yt=-*y}Q>t!Qqot^2y*7euFVX$%#KmlPo<3?=)i? zNk|M31H`}^WxyO~mh(oRpB|eSAO>EJ0X!cBD57Vu)ToXQXz=$D$E%1aVB=i^Q5f_L zmKtFMgzHp5oyyG>gX?tg3lrxVEH&zM#?{I&k6D?U8wyvegI}m{#yySH5(C7*QwA1v z*Twq3^Zfn)X%h8_0b<}?F~FdfTDpKV&K0r@Bjd3WQhO( literal 0 HcmV?d00001 diff --git a/README.md b/README.md index 4e48216..9e7a698 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ -# assignment_07 \ No newline at end of file +assignment_07 +============== \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analytics.py b/analytics.py new file mode 100644 index 0000000..5b43446 --- /dev/null +++ b/analytics.py @@ -0,0 +1,228 @@ +from .utils import * + +def find_largest_city(gj): + """ + Iterate through a geojson feature collection and + find the largest city. Assume that the key + to access the maximum population is 'pop_max'. + + Parameters + ---------- + gj : dict + A GeoJSON file read in as a Python dictionary + + Returns + ------- + city : str + The largest city + + population : int + The population of the largest city + """ + city = None + max_population = 0 + for n in gj["features"]: + properties = n["properties"] + if (properties["pop_max"] > max_population): + max_population = properties["pop_max"] + city = properties["adm1name"] + + return city, max_population + + +def alaska_points(gj): + # Find coordinates from Alaska + alaska_points = [] + for n in gj["features"]: + properties = n["properties"] + state_name = properties["adm1name"] + if state_name == "Alaska": + alaska_points.append(n) + else: + continue + + return alaska_points + + +def mean_center(points): + """ + Given a set of points, compute the mean center + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + x : float + Mean x coordinate + + y : float + Mean y coordinate + """ + x = 0 + y = 0 + number_of_points = 0 + + for p in points: + x += p[0] + y += p[1] + number_of_points += 1 + + x = x / number_of_points + y = y / number_of_points + + return x, y + + +def average_nearest_neighbor_distance(points, mark=None): + """ + Given a set of points, compute the average nearest neighbor. + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + mean_d : float + Average nearest neighbor distance + + References + ---------- + Clark and Evan (1954 Distance to Nearest Neighbor as a + Measure of Spatial Relationships in Populations. Ecology. 35(4) + p. 445-453. + """ + mean_d = 0 + temp_p = None + + if mark == None: + for p in points: + for q in points: + if check_coincident(p, q): + continue + cached = euclidean_distance(p, q) + if temp_p is None: + temp_p = cached + elif temp_p > cached: + temp_p = cached + + mean_d += temp_p + temp_p = None + else: + for p in points: + if p.mark == mark: + for q in points: + if check_coincident(p, q): + continue + cached = euclidean_distance(p, q) + if temp_p is None: + temp_p = cached + elif temp_p > cached: + temp_p = cached + + mean_d += temp_p + temp_p = None + + + + + + return mean_d / len(points) + + +def minimum_bounding_rectangle(points): + """ + Given a set of points, compute the minimum bounding rectangle. + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + : list + Corners of the MBR in the form [xmin, ymin, xmax, ymax] + """ + + min_x = 1000000000 + min_y = 1000000000 + max_x = -1 + max_y = -1 + + for n in points: + if n[0] < min_x: + min_x = n[0] + if n[0] > max_x: + max_x = n[0] + if n[1] < min_y: + min_y = n[1] + if n[1] > max_y: + max_y = n[1] + + mbr = [min_x, min_y, max_x, max_y] + + return mbr + + +def mbr_area(mbr): + """ + Compute the area of a minimum bounding rectangle + """ + area = ((mbr[2] - mbr[0]) * (mbr[3] - mbr[1])) + + return area + +def expected_distance(area, n): + """ + Compute the expected mean distance given + some study area. + + This makes lots of assumptions and is not + necessarily how you would want to compute + this. This is just an example of the full + analysis pipe, e.g. compute the mean distance + and the expected mean distance. + + Parameters + ---------- + area : float + The area of the study area + + n : int + The number of points + """ + + expected = (0.5 * (math.sqrt(area/n))) + return expected + + +def create_random(n, mark=None): + random.seed() + random_points = [(random.randint(0,100), random.randint(0,100), mark) for i in range(n)] + return random_points + + +def permutations(p=99, n=100): + #Compute the mean nearest neighbor distance + permutationz = [] + for i in range(p): + permutationz.append(average_nearest_neighbor_distance(create_random(n))) + return permutationz + + +def compute_critical(points): + lower_bound = min(points) + upper_bound = max(points) + return lower_bound, upper_bound + + +def check_significant(lower, upper, observed): + if observed > upper: + return True + if observed < lower: + return True \ No newline at end of file diff --git a/io_geojson.py b/io_geojson.py new file mode 100644 index 0000000..2619650 --- /dev/null +++ b/io_geojson.py @@ -0,0 +1,26 @@ +import json +from urllib.request import urlopen + +def read_geojson(input_url): + """ + Read a geojson file + + Parameters + ---------- + input_file : str + The PATH to the data to be read + + Returns + ------- + gj : dict + An in memory version of the geojson + """ + # I still can't seem to open json locally so going the url route + # for now until I figure it out! + # with open(input_file, 'r') as f: + # gj = json.load(f) + response = urlopen(input_url).read().decode('utf8') #For Testing purposes + # response = urlopen("https://api.myjson.com/bins/4587l").read().decode('utf8') + gj = json.loads(response) + + return gj diff --git a/point.py b/point.py new file mode 100644 index 0000000..ca21a40 --- /dev/null +++ b/point.py @@ -0,0 +1,81 @@ +from utils import * +import numpy as np + +class Point(object): + def __init__(self, x, y, mark={}): + self.x = x + self.y = y + self.mark = mark + + def __add__(self, other): + return self.x + other.x, self.y, other.y + + def __div__(self, other): + return self.x / other.x, self.y, other.y + + def __sub__(self, other): + return self.x - other.x, self.y, other.y + + def create_random_marked_points(self, n, marks=[]): + list_o_random_points = [] + for i in range(n): + random_point = Point(random.seed, random.seed, random.choice(marks)) + list_o_random_points.append(random_point) + return list_o_random_points + + def check_coincident(self, other): + return check_coincident((self.x, self.y), (other.x, other.y)) + + def shift_point(self, x_shift, y_shift): + return shift_point((self.x, self.y), x_shift, y_shift) + +class PointPattern: + def __init__(self): + self.points = [] + + def average_nearest_neighbor_distance(self, mark=None): + return utils.average_nearest_neighbor_distance(self.points, mark) + + def list_of_marks(self): + list_o_marks = [] + for point in self.points: + if point.mark not in list_o_marks: + list_o_marks.append(point.mark) + return list_o_marks + + def coincident_points(self): + number_of_coincidents = 0 + list_o_coincidents = [] + for point in range(len(self.points)): + for neighbor in range(len(self.points)): + if point in list_o_coincidents or point==neighbor: + continue + # This was easier to make work than check_coincident + if self.points[point] == self.points[neighbor]: + number_of_coincidents = count + 1 + list_o_coincidents.append(neighbor) + return number_of_coincidents + + def subset_of_points_by_mark_type(self, mark): + subset_list = [] + for point in self.points: + if point.mark == mark: + subset.append(point) + return subset_list + + def generate_n_random_points(self, n=None): + n_random_points = create_random_marked_points(n = len(self.points),marks = []) + return n_random_points + + def generate_k_patterns(self, k): + return analytics.permutations(self.marks, k) + + def nearest_neighbor_critical_points(self): + return analytics.compute_critical(self.generate_k_patterns(100)) + + def compute_g(self, nsteps): + ds = np.linspace(0, 100, nsteps) + sum_g = 0 + for n in range(nsteps): + oi = ds[n] + # Kind of stuck where to go from here, going to come back \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/functional_test.py b/tests/functional_test.py new file mode 100644 index 0000000..6a6cf61 --- /dev/null +++ b/tests/functional_test.py @@ -0,0 +1,137 @@ +import random +import unittest + +from .. import analytics +from .. import io_geojson +from .. import utils + + +class TestFunctionalPointPattern(unittest.TestCase): + + def setUp(self): + random.seed(12345) + i = 0 + self.points = [] + while i < 100: + seed = (round(random.random(),2), round(random.random(),2)) + self.points.append(seed) + n_additional = random.randint(5,10) + i += 1 + c = random.choice([0,1]) + if c: + for j in range(n_additional): + x_offset = random.randint(0,10) / 100 + y_offset = random.randint(0,10) / 100 + pt = (round(seed[0] + x_offset, 2), round(seed[1] + y_offset,2)) + self.points.append(pt) + i += 1 + if i == 100: + break + if i == 100: + break + + def test_point_pattern(self): + """ + This test checks that the code can compute an observed mean + nearest neighbor distance and then use Monte Carlo simulation to + generate some number of permutations. A permutation is the mean + nearest neighbor distance computed using a random realization of + the point process. + """ + random.seed() # Reset the random number generator using system time + # I do not know where you have moved avarege_nearest_neighbor_distance, so update the point_pattern module + observed_avg = analytics.average_nearest_neighbor_distance(self.points) + self.assertAlmostEqual(0.03001895090111224, observed_avg, 3) + + # Again, update the point_pattern module name for where you have placed the point_pattern module + # Also update the create_random function name for whatever you named the function to generate + # random points + rand_points = analytics.create_random(100) + self.assertEqual(100, len(rand_points)) + + # As above, update the module and function name. + permutations = analytics.permutations(99) + self.assertEqual(len(permutations), 99) + self.assertNotEqual(permutations[0], permutations[1]) + + # As above, update the module and function name. + lower, upper = analytics.compute_critical(permutations) + self.assertTrue(lower > 0.03) + self.assertTrue(upper < 7) + self.assertTrue(observed_avg < lower or observed_avg > upper) + + # As above, update the module and function name. + significant = analytics.check_significant(lower, upper, observed_avg) + self.assertTrue(significant) + + self.assertTrue(True) + + + + def test_point_pattern_with_marks(self, marks=[]): + + # The above but with mark checking + if not marks: + random.seed() # Reset the random number generator using system time + # I do not know where you have moved avarege_nearest_neighbor_distance, so update the point_pattern module + observed_avg = analytics.average_nearest_neighbor_distance(self.points) + self.assertAlmostEqual(0.03001895090111224, observed_avg, 3) + + # Again, update the point_pattern module name for where you have placed the point_pattern module + # Also update the create_random function name for whatever you named the function to generate + # random points + rand_points = analytics.create_random(100) + self.assertEqual(100, len(rand_points)) + + # As above, update the module and function name. + permutations = analytics.permutations(99) + self.assertEqual(len(permutations), 99) + self.assertNotEqual(permutations[0], permutations[1]) + + # As above, update the module and function name. + lower, upper = analytics.compute_critical(permutations) + self.assertTrue(lower > 0.03) + self.assertTrue(upper < 7) + self.assertTrue(observed_avg < lower or observed_avg > upper) + + # As above, update the module and function name. + significant = analytics.check_significant(lower, upper, observed_avg) + self.assertTrue(significant) + + self.assertTrue(True) + else: + for mark in marks: + random.seed() # Reset the random number generator using system time + # I do not know where you have moved avarege_nearest_neighbor_distance, so update the point_pattern module + observed_avg = analytics.average_nearest_neighbor_distance(self.points) + self.assertAlmostEqual(0.03001895090111224, observed_avg, 3) + + # Again, update the point_pattern module name for where you have placed the point_pattern module + # Also update the create_random function name for whatever you named the function to generate + # random points + rand_points = analytics.create_random(100, mark) + self.assertEqual(100, len(rand_points)) + + # As above, update the module and function name. + permutations = analytics.permutations(99) + self.assertEqual(len(permutations), 99) + self.assertNotEqual(permutations[0], permutations[1]) + + # As above, update the module and function name. + lower, upper = analytics.compute_critical(permutations) + self.assertTrue(lower > 0.03) + self.assertTrue(upper < 7) + self.assertTrue(observed_avg < lower or observed_avg > upper) + + # As above, update the module and function name. + significant = analytics.check_significant(lower, upper, observed_avg) + self.assertTrue(significant) + self.assertEqual(random.choice(rand_points), mark) + + + + + + + + \ No newline at end of file diff --git a/tests/point_test.py b/tests/point_test.py new file mode 100644 index 0000000..08ea062 --- /dev/null +++ b/tests/point_test.py @@ -0,0 +1,61 @@ +import os +import sys +import random +from nose.tools import set_trace +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import point + +class TestPoint(unittest.TestCase): + + + def setUp(self): + self.x = 3 + self.y = 1 + self.marks = [] + + def test_set_x_and_y_correctly(self): + self.assertEqual(self.x, 3) + self.assertEqual(self.y, 1) + + def test_should_catch_coincident_point(self): + self.assertTrue(point.check_coincident(self, self)) + + def test_should_catch_noncoincident_point(self): + common_point = point.Point(1, 4, ['Pizza', 'BBQ', 'Calzones']) + self.assertFalse(point.check_coincident(self, common_point)) + + def test_shift_the_point(self): + common_point = point.Point(1, 4, ['Pizza', 'BBQ', 'Calzones']) + old_x = common_point.x + new_x = point.shift_point(common_point, 1, 2)[0] + self.assertNotEqual(old_x, new_x) + + def test_mark_creation(self): + random_n = int(random.random() * 100) + list_o_marks = ['Bernie Sanders', 'Ted Cruz', 'Donald Trump', 'Hillary Clinton', 'John Kasich', 'Jill Stein'] + random_mark = random.choice(list_o_marks) + list_o_points = [point.Point(random.random(), random.random()) for i in range(random_n)] + for p in list_o_points: + p.mark = random_mark + self.assertEqual(random_n, len(list_o_points)) + + # def test_create_random_marked_points(self): + # I can't for the life of me get this to work, may be staring at it for too long + # set_trace() + # point.create_random_marked_points(100, ['Water', 'Fire', 'Earth']) + + def test_should_add_points_with_magic(self): + other_point = point.Point(300, 2, ["blurb"]) + self.assertEqual(self.x + other_point.x, 303) + + def test_should_subtract_points_with_magic(self): + other_point = point.Point(300, 2, ["blurb"]) + self.assertEqual(self.x - other_point.x, -297) + + def test_should_divide_points_with_magic(self): + other_point = point.Point(300, 2, ["blurb"]) + self.assertEqual(self.x / other_point.x, 0.01) + + diff --git a/tests/test_analytics.py b/tests/test_analytics.py new file mode 100644 index 0000000..1b9eae7 --- /dev/null +++ b/tests/test_analytics.py @@ -0,0 +1,24 @@ +import os +import sys +import unittest +import numpy +sys.path.insert(0, os.path.abspath('..')) + +from .. import analytics + +class TestAnalytics(unittest.TestCase): + + def setUp(self): + self.permutations = analytics.permutations(666) + self.random = analytics.create_random(666) + + def test_compute_critical(self): + test_batch = [2, 3, 100, 5, 401, 502, 44, 90] + self.lower, self.upper = analytics.compute_critical(test_batch) + self.assertTrue(self.lower == 2, self.upper == 502) + + def test_permutations(self): + self.assertEqual(len(analytics.permutations(100)), 100) + + def test_create_random(self): + self.assertEqual(len(self.random), 666) \ No newline at end of file diff --git a/tests/test_io_geojson.py b/tests/test_io_geojson.py new file mode 100644 index 0000000..2d0b40c --- /dev/null +++ b/tests/test_io_geojson.py @@ -0,0 +1,17 @@ +import os +import sys +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import io_geojson, analytics + +class TestIoGeoJson(unittest.TestCase): + + def setUp(self): + self.input_url = "https://api.myjson.com/bins/4587l" + + + def test_read_geojson(self): + response = io_geojson.read_geojson(self.input_url) + largest_city = analytics.find_largest_city(response) + self.assertEqual(largest_city[0], "New York") \ No newline at end of file diff --git a/tests/test_pointpattern.py b/tests/test_pointpattern.py new file mode 100644 index 0000000..779191c --- /dev/null +++ b/tests/test_pointpattern.py @@ -0,0 +1,6 @@ +import unittest +from .. import point + +class TestPointPattern(unittest.TestCase): + def setUp(self): + pass \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..54a79f6 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,16 @@ +import os +import sys +import unittest +sys.path.insert(0, os.path.abspath('..')) + +from .. import utils + +class TestUtils(unittest.TestCase): + + def setUp(self): + self.points_a = (1,4) + self.points_b = (1,4) + + + def check_coincident(self): + self.assertEqual(check_coincident(self.points_a), check_coincident(self.points_b)) \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..03144e4 --- /dev/null +++ b/utils.py @@ -0,0 +1,159 @@ +import math +import random + +def manhattan_distance(a, b): + """ + Compute the Manhattan distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + distance : float + The Manhattan distance between the two points + """ + distance = abs(a[0] - b[0]) + abs(a[1] - b[1]) + return distance + + + +def euclidean_distance(a, b): + """ + Compute the Euclidean distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + + distance : float + The Euclidean distance between the two points + """ + distance = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) + return distance + + +def shift_point(point, x_shift, y_shift): + """ + Shift a point by some amount in the x and y directions + + Parameters + ---------- + point : tuple + in the form (x,y) + + x_shift : int or float + distance to shift in the x direction + + y_shift : int or float + distance to shift in the y direction + + Returns + ------- + new_x : int or float + shited x coordinate + + new_y : int or float + shifted y coordinate + + Note that the new_x new_y elements are returned as a tuple + + Example + ------- + >>> point = (0,0) + >>> shift_point(point, 1, 2) + (1,2) + """ + x = getx(point) + y = gety(point) + + x += x_shift + y += y_shift + + return x, y + + +def check_coincident(a, b): + """ + Check whether two points are coincident + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + equal : bool + Whether the points are equal + """ + return a == b + + +def check_in(point, point_list): + """ + Check whether point is in the point list + + Parameters + ---------- + point : tuple + In the form (x,y) + + point_list : list + in the form [point, point_1, point_2, ..., point_n] + """ + return point in point_list + + +def getx(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + x coordinate + """ + + return point.x + + +def gety(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + y coordinate + """ + + return point.y \ No newline at end of file