Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assignment 10 submission #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Ignore the temp folder
tmp/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
159 changes: 159 additions & 0 deletions PointPatternAnalysis.ipynb

Large diffs are not rendered by default.

Binary file added exit-24.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added openFolder.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added src/__init__.py
Empty file.
108 changes: 108 additions & 0 deletions src/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from . import utils


def find_largest_city(gj):
"""
Iterate through a geojson feature collection and
find the largest city. Assume that the key
to access the maximum population is 'pop_max'.

Parameters
----------
gj : dict
A GeoJSON file read in as a Python dictionary

Returns
-------
city : str
The largest city

population : int
The population of the largest city
"""
city = None
max_population = 0
for item in gj["features"]:
props = item["properties"]
if props["pop_max"] > max_population:
max_population = props["pop_max"]
city = props["adm1name"]

return city, max_population


def average_nearest_neighbor_distance(points_list, mark = None):
"""
Given a set of points, compute the average nearest neighbor.

Parameters
----------
points_list : list
A list of Point objects.
mark : str
An optional string to filter the inputs by a certain color.

Returns
-------
mean_d : float
Average nearest neighbor distance

References
----------
Clark and Evan (1954 Distance to Nearest Neighbor as a
Measure of Spatial Relationships in Populations. Ecology. 35(4)
p. 445-453.
"""
points = None
if mark is None:
# User passed in no mark, we will use the entire points_list.
points = points_list
else:
points = list(filter(lambda current_point: current_point.mark['color'] == mark, points_list))

mean_d = 0
temp_nearest_neighbor = None
# Average the nearest neighbor distance of all points.
for i, point in enumerate(points):
# Find the nearest neighbor to this point.
for j, otherPoint in enumerate(points):
# You are not your own neighbor.
if i == j:
continue
# To avoid multiple calculations, we'll cache the result.
current_distance = utils.euclidean_distance((point.x, point.y), (otherPoint.x, otherPoint.y))
# nearest neighbor will be None if this is the first neighbor we have iterated over.
if temp_nearest_neighbor is None:
temp_nearest_neighbor = current_distance
elif temp_nearest_neighbor > current_distance:
temp_nearest_neighbor = current_distance
# At this point, we've found point's nearest neighbor distance.
# Add in that distance.
mean_d += temp_nearest_neighbor
temp_nearest_neighbor = None

# Divide by number of points.
mean_d /= len(points)

return mean_d


def permutations(p=99, mark=None):
n = 100
to_return = []
for i in range(p):
to_return.append(
average_nearest_neighbor_distance(
utils.create_random(n),
mark
)
)
return to_return


def compute_critical(p):
"""
Calculates the critical points (lowest distance and greatest distance) in a set of
randomly generated permutations (created using permutations(p)).
"""
return min(p), max(p)
28 changes: 28 additions & 0 deletions src/io_geojson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json


def read_geojson(input_file):
"""
Read a geojson file

Parameters
----------
input_file : str
The PATH to the data to be read

Returns
-------
gj : dict
An in memory version of the geojson
"""
# Please use the python json module (imported above)
# to solve this one.
with open(input_file, 'r') as fp:
gj = json.load(fp)
return gj


def read_tweets(tweet_file):
with open(tweet_file, 'r') as fp:
to_return = json.load(fp)
return to_return
32 changes: 32 additions & 0 deletions src/point.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from . import utils
import random


class Point(object):
def __init__(self, x, y, **mark):
self.x = x
self.y = y
self.mark = mark

def __add__(self, other):
return Point(self.x + other.x, self.y + other.y)

def __radd__(self, other):
return self.__add__(self, other)

def __str__(self):
return "({0}, {1})".format(self.x, self.y)

def __neg__(self):
return Point(-self.x, -self.y)

def is_coincident(self, other_point):
return utils.check_coincident((self.x, self.y), (other_point.x, other_point.y))

def shift_point(self, delta_x, delta_y):
result = utils.shift_point((self.x, self.y), delta_x, delta_y)
self.x = utils.getx(result)
self.y = utils.gety(result)

def get_array(self):
return [self.x, self.y]
154 changes: 154 additions & 0 deletions src/point_pattern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from .point import Point
from . import analytics
import random
import numpy as np
import scipy.spatial as ss


class PointPattern(object):
def __init__(self):
self.points = []

def add_point(self, point):
self.points.append(point)

def remove_point(self, index):
try:
del(self.points[index])
except IndexError:
print('Index {} not in list'.format(index))

def average_nearest_neighbor_distance(self):
return analytics.average_nearest_neighbor_distance(self.points)

def average_nearest_neighbor_distance_kdtree(self, mark_name=None, mark_value=None):
point_list = []
points = None
if mark_name is None:
points = self.points
else:
points = list(filter(lambda current_point: current_point.mark[mark_name] == mark_value, self.points))
for point in points:
point_list.append(point.get_array())
point_stack = np.vstack(point_list)
kdtree = ss.KDTree(point_stack)
distances = []
for p in point_stack:
nearest_neighbor_distance, _ = kdtree.query(p, k=2)
distances.append(nearest_neighbor_distance[1])
nn_distances = np.array(distances)
return np.mean(distances)

def average_nearest_neighbor_distance_numpy(self):
point_list = []
for point in self.points:
point_list.append(point.get_array())
ndarray = np.array(point_list)
nearest_neighbors = []
temp_nearest_neighbor = None
# Average the nearest neighbor distance of all points.
for i, point in enumerate(ndarray):
# Find the nearest neighbor to this point.
for j, otherPoint in enumerate(ndarray):
# You are not your own neighbor.
if i == j:
continue
current_distance = ss.distance.euclidean(point, otherPoint)
# nearest neighbor will be None if this is the first neighbor we have iterated over.
if temp_nearest_neighbor is None:
temp_nearest_neighbor = current_distance
elif temp_nearest_neighbor > current_distance:
temp_nearest_neighbor = current_distance
# At this point, we've found point's nearest neighbor distance.
# Add in that distance.
nearest_neighbors.append(temp_nearest_neighbor)
temp_nearest_neighbor = None

return np.mean(nearest_neighbors)

def count_coincident(self):
"""
Returns the number of coincident points.
If two points are at the same spatial location,
that counts as two coincident points. Three coincident points
means three points at the same location.
"""
to_return = 0
handled_indices = []
for i, point_a in enumerate(self.points):
for j, point_b in enumerate(self.points):
if j in handled_indices:
continue
if i == j:
continue
if point_a.is_coincident(point_b):
to_return += 1
handled_indices.append(j)
return to_return

def list_marks(self):
marks = []
for point in self.points:
if 'color' in point.mark and point.mark['color'] not in marks:
marks.append(point.mark['color'])
return marks

def find_subset_with_mark(self, mark):
return list(filter(lambda current_point: 'color' in current_point.mark and current_point.mark['color'] == mark, self.points))

def generate_random_points(self, n=None):
if n is None:
n = len(self.points)
to_return = []
marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']

for i in range(n):
to_return.append(Point(
round(random.random(), 2),
round(random.random(), 2),
color=random.choice(marks)
))
return to_return

def generate_realizations(self, k):
n = 100
to_return = []
for i in range(k):
to_return.append(
analytics.average_nearest_neighbor_distance(
self.generate_random_points(count=n)
)
)
return to_return

def get_critical_points(self):
return analytics.compute_critical(self.generate_realizations(100))

def compute_g(self, nsteps):
ds = np.linspace(0, 1, nsteps)
current_sum = 0
for i in range(nsteps):
o_i = ds[i]
# argsort puts the points in order. We want to ignore the same point each time,
# so we get element at index 1. An array minus a number performs a scalar
# operation on all elements.
current_sum += np.abs(ds[np.argsort(np.abs(ds - o_i))[1]] - o_i)
return current_sum / nsteps

def generate_random_points(self, count = 2, range_min = 0, range_max = 1, seed = None):
rng = None
if seed is None:
rng = np.random
else:
rng = np.random.RandomState(seed)
random.seed(seed)
pairs = rng.uniform(range_min, range_max, (count, 2))
to_return = []
marks = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
for i in range(len(pairs)):
to_return.append(Point(
pairs[i][0],
pairs[i][1],
color=random.choice(marks)
))
return to_return
Empty file added src/tests/__init__.py
Empty file.
60 changes: 60 additions & 0 deletions src/tests/point_pattern_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest

from ..point_pattern import PointPattern
from ..point import Point


class TestPointPattern(unittest.TestCase):
def setUp(self):
self.point_pattern = PointPattern()
self.point_pattern.add_point(Point(5, 6, color='red'))
self.point_pattern.add_point(Point(6, 5, color='orange'))
self.point_pattern.add_point(Point(5, 6, color='orange'))
self.point_pattern.add_point(Point(5, 6))

def test_coincident(self):
self.assertEqual(self.point_pattern.count_coincident(), 3)

def test_list_marks(self):
self.assertEqual(self.point_pattern.list_marks(), ['red', 'orange'])

def test_find_subset_with_mark(self):
self.assertEqual(len(self.point_pattern.find_subset_with_mark('orange')), 2)
self.assertEqual(len(self.point_pattern.find_subset_with_mark('red')), 1)

def test_generate_random(self):
# First test does not pass in n, making n = length of current point pattern.
self.assertEqual(len(self.point_pattern.generate_random_points()), 4)
# Second test explicitly passes in n.
self.assertEqual(len(self.point_pattern.generate_random_points(10)), 10)

def test_generate_realizations(self):
self.assertEqual(len(self.point_pattern.generate_realizations(100)), 100)

def test_compute_g(self):
self.assertAlmostEqual(self.point_pattern.compute_g(10), 0.111, places=3)
self.assertAlmostEqual(self.point_pattern.compute_g(50), 0.020, places=3)
self.assertAlmostEqual(self.point_pattern.compute_g(100), 0.010, places=3)
self.assertAlmostEqual(self.point_pattern.compute_g(1000), 0.001, places=3)

def test_nearest_neighbor(self):
# Test the KDTree implementation against the original implementation.
self.assertEqual(
self.point_pattern.average_nearest_neighbor_distance_kdtree(),
self.point_pattern.average_nearest_neighbor_distance())
self.assertAlmostEqual(self.point_pattern.average_nearest_neighbor_distance_kdtree(), 0.354, places=3)
self.assertAlmostEqual(self.point_pattern.average_nearest_neighbor_distance_numpy(), 0.354, places=3)

def test_generate_random(self):
points_list = []
marks_list = []
for point in self.point_pattern.generate_random_points(count = 3, seed = 1234):
points_list.append(point.get_array())
marks_list.append(point.mark['color'])
self.assertAlmostEqual(points_list[0][0], 0.19, places=2)
self.assertAlmostEqual(points_list[0][1], 0.62, places=2)
self.assertAlmostEqual(points_list[1][0], 0.44, places=2)
self.assertAlmostEqual(points_list[1][1], 0.79, places=2)
self.assertAlmostEqual(points_list[2][0], 0.78, places=2)
self.assertAlmostEqual(points_list[2][1], 0.27, places=2)
self.assertEqual(marks_list, ['violet', 'green', 'red'])
Loading