Skip to content

Added updates to assignment 5 #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
import math
import random
from .utils import euclidean_distance, n_random_points

def find_largest_city(gj):
"""
Iterate through a geojson feature collection and
find the largest city. Assume that the key
to access the maximum population is 'pop_max'.

Parameters
----------
gj : dict
A GeoJSON file read in as a Python dictionary

Returns
-------
city : str
The largest city

population : int
The population of the largest city
"""
#features is a list, so iteration is by position
#if you want to iterate over the features you need to first grab the list out of the dictionary.

featureList = gj['features']
# now that you have the features, compare the pop_max fields to find the largest one
max_population = 0
for featureEntry in featureList:
if featureEntry["properties"]["pop_max"] > max_population:
max_population = featureEntry["properties"]["pop_max"]
city = featureEntry["properties"]["nameascii"]


return city, max_population

def write_your_own(gj):
"""
This function finds the least populated city, pop_min
"""
featureList = gj["features"]
minPop = math.inf
for featureEntry in featureList:
#feature["properties"]["pop_min"] for feature in self.gj["features"]
if featureEntry["properties"]["pop_min"] < minPop:
minPop = featureEntry["properties"]["pop_min"]
city = featureEntry["properties"]["nameascii"]
# minn = min(featureEntry["properties"]["pop_min"])
# print(minn)
return city, minPop

def mean_center(points):
"""
Given a set of points, compute the mean center

Parameters
----------
points : list
A list of points in the form (x,y)

Returns
-------
x : float
Mean x coordinate

y : float
Mean y coordinate
"""

#find the average of all the X points in the list

# x_sum = sum(points[0])
#points_length = len(points)

sums = map(sum,zip(*points)) # returns iterable object of type map
sumsL = list(sums)
avgs = map(lambda xy: xy/len(points),sumsL)
avgsL = list(avgs)
x = avgsL[0]
y = avgsL[1]

return x,y

def average_nearest_neighbor_distance(points):
"""
Given a set of points, compute the average nearest neighbor.

Parameters
----------
points : list
A list of points in the form (x,y)

Returns
-------
mean_d : float
Average nearest neighbor distance

References
----------
Clark and Evan (1954 Distance to Nearest Neighbor as a
Measure of Spatial Relationships in Populations. Ecology. 35(4)
p. 445-453.
"""

shDistL =[] #list of shortest distances

#now the points are numbered... so if the points
#have the same counter number attached also, then they
#are self-neighbors, but if num1 != num2, then they are
# coincident points, with distance = 0
for num1, point in enumerate(points):
shortestDistance = math.inf
for num2, dpoint in enumerate(points):
if num1 != num2:
dist = euclidean_distance(point, dpoint)
if(shortestDistance > dist):
shortestDistance = dist
#now add the shortest distance of that point before it moves on to a new point
shDistL.append(shortestDistance)
print(shDistL)
sums = sum(shDistL)
mean_d = sums/len(shDistL)
return mean_d


def minimum_bounding_rectangle(points):
"""
Given a set of points, compute the minimum bounding rectangle.

Parameters
----------
points : list
A list of points in the form (x,y)

Returns
-------
: list
Corners of the MBR in the form [xmin, ymin, xmax, ymax]
"""
# a minimum bounding rectangle would be on the extremes of x/y

xmin = math.inf
ymin = math.inf
xmax = -9999999999
ymax = -9999999999
for point in points:
if point[0] < xmin:
xmin = point[0]
if point[1] < ymin:
ymin = point[1]
if point[0] > xmax:
xmax = point[0]
if point[1] > ymax:
ymax = point[1]
mbr = [xmin,ymin,xmax,ymax]
print("This is the mbr:")
print(mbr)
return mbr

def mbr_area(mbr):
"""
Compute the area of a minimum bounding rectangle
"""
length = mbr[2] - mbr[0]
width = mbr[3] - mbr[1]
area = length*width

return area

def expected_distance(area, n):
"""
Compute the expected mean distance given
some study area.

This makes lots of assumptions and is not
necessarily how you would want to compute
this. This is just an example of the full
analysis pipe, e.g. compute the mean distance
and the expected mean distance.

Parameters
----------
area : float
The area of the study area

n : int
The number of points
"""

expected = 0.5 * (math.sqrt(area/n))
return expected


def permutation_nearest_distance(p=99,n=100):
"""
Finds the nearest neighbor distance for p permutations with n
random points
:param p: permutation number of times you want to try different
simulations for monte carlo
:param n: random point number
:return LDist: list of distances, length p
"""
LDist = []
for x in range(p): #loop from 0 to p
#create n random points
points = n_random_points(n) # returns [(x,y),(a,b)..]
#compute mean neighbor distance
mean_d = average_nearest_neighbor_distance(points)
LDist.append(mean_d)

return LDist

def critical_points(LDist):
"""
Find the critical points, the largest/smallest distances
:param LDist: the list of mean distances
:return CList: list containing critical points
"""
CList = []
smallest = min(LDist)
largest = max(LDist)
CList.append(smallest)
CList.append(largest)
#print(CList)
return CList

def significant(CList,distance):
"""
Returns True if the observed distance is significant
:param CList: list of critical points
:param distance: the observed distance
:return result: True/False
"""

if distance < CList[0] or distance > CList[1]:
result = True
else:
result = False
return result
22 changes: 22 additions & 0 deletions io_geojson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json

def read_geojson(input_file):
"""
Read a geojson file

Parameters
----------
input_file : str
The PATH to the data to be read

Returns
-------
gj : dict
An in memory version of the geojson
"""
# Please use the python json module (imported above)
# to solve this one.
with open(input_file,'r') as file:
gj = json.load(file)
print(gj)
return gj
23 changes: 14 additions & 9 deletions tests/functional_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,33 @@ def test_point_pattern(self):
"""
random.seed() # Reset the random number generator using system time
# I do not know where you have moved avarege_nearest_neighbor_distance, so update the point_pattern module
observed_avg = point_pattern.average_nearest_neighbor_distance(self.points)
observed_avg = analytics.average_nearest_neighbor_distance(self.points)
self.assertAlmostEqual(0.027, observed_avg, 3)

# Again, update the point_pattern module name for where you have placed the point_pattern module
# Also update the create_random function name for whatever you named the function to generate
# random points
rand_points = point_pattern.create_random(100)
rand_points = utils.n_random_points(100)
self.assertEqual(100, len(rand_points))

# As above, update the module and function name.
permutations = point_pattern.permutations(99)
permutations = analytics.permutation_nearest_distance(99)
self.assertEqual(len(permutations), 99)
self.assertNotEqual(permutations[0], permutations[1])

"""
Changed the test case regarding significant slightly, because my critical_points method returns a list of the
two critical points, and significant gets passed a list. So there aren't 3 parameters.
"""

# As above, update the module and function name.
lower, upper = point_pattern.compute_critical(permutations)
self.assertTrue(lower > 0.03)
self.assertTrue(upper < 0.07)
self.assertTrue(observed_avg < lower or observed_avg > upper)
critical = analytics.critical_points(permutations)
self.assertTrue(critical[0] > 0.03)
self.assertTrue(critical[1] < 0.07)
self.assertTrue(observed_avg < critical[0] or observed_avg > critical[1])

# As above, update the module and function name.
significant = point_pattern.check_significant(lower, upper, observed)
significant = analytics.significant(critical, observed_avg)
self.assertTrue(significant)

self.assertTrue(False)
self.assertTrue(True)
74 changes: 72 additions & 2 deletions tests/test_analytics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,81 @@
import os
import sys
import unittest
import random
sys.path.insert(0, os.path.abspath('..'))

from .. import analytics

class TestAnalytics(unittest.TestCase):

def setUp(self):
pass
@classmethod
def setUpClass(cls):
# Seed a random number generator so we get the same random values every time
random.seed(12345)
# A list comprehension to create 50 random points
cls.points = [(random.uniform(0,1), random.uniform(0,1)) for i in range(50)]

def test_permutation(self):
list = analytics.permutation_nearest_distance()
self.assertEqual(99,len(list))
list = analytics.permutation_nearest_distance(50,20)
self.assertEqual(50,len(list))

def test_critical(self):
list = analytics.permutation_nearest_distance()
critical = analytics.critical_points(list)
self.assertEqual(min(list),critical[0])
self.assertEqual(max(list),critical[1])

def test_significant(self):
critical = analytics.critical_points(analytics.permutation_nearest_distance())

distance = random.uniform(0,critical[0]-.000000001) #distance < min +
self.assertTrue(analytics.significant(critical,distance))

distance = random.uniform(critical[1]+.00000000000000001,9999999999999999999999999999999999999999999999999999999999) #distance > max
self.assertTrue(analytics.significant(critical,distance))

distance = random.uniform(critical[0],critical[1]) # min < distance < max
self.assertFalse(analytics.significant(critical,distance))

def test_average_nearest_neighbor_distance(self):
mean_d = analytics.average_nearest_neighbor_distance(self.points)
self.assertAlmostEqual(mean_d, 0.0884470472, 5)
"""
Changed from 7.629178 to 0.0884470472 because with the changed domain from (1-100) to (0-1), mean_d will be very different
"""

def test_mean_center(self):
"""
Something to think about - What values would you
expect to see here and why? Why are the values
not what you might expect?
"""
"""
Changed assert statements/test values slightly to match with domain of [0,1]
"""

x, y = analytics.mean_center(self.points)
self.assertAlmostEqual(x, 0.50273194,5)
self.assertAlmostEqual(y, 0.45796236, 5)

def test_minimum_bounding_rectangle(self):
mbr = analytics.minimum_bounding_rectangle(self.points)
self.assertAlmostEqual(mbr[0], 0.003331,5)
self.assertAlmostEqual(mbr[1], 0.003184,5)
self.assertAlmostEqual(mbr[2], 0.994604,5)
self.assertAlmostEqual(mbr[3], 0.967482,5)

def test_mbr_area(self):
mbr = [0,0,94,98]
area = analytics.mbr_area(mbr)
self.assertEqual(area, 9212)

def test_expected_distance(self):
area = 9212
npoints = 50
expected = analytics.expected_distance(area, npoints)
self.assertAlmostEqual(expected, 6.7867518, 5)


Loading