diff --git a/Plot b/Plot new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Plot @@ -0,0 +1 @@ + diff --git a/Plot.py b/Plot.py new file mode 100644 index 0000000..4989e28 --- /dev/null +++ b/Plot.py @@ -0,0 +1,24 @@ +import json +import folium +def read_geojson(input_file): + """ + Read a geojson file + + Parameters + ---------- + input_file : str + The PATH to the data to be read + + Returns + ------- + gj : dict + An in memory version of the geojson + """ + + with open(input_file, 'r') as f: + gj=json.load(f) + + # Please use the python json module (imported above) + # to solve this one. + + return gj diff --git a/analytics.py b/analytics.py new file mode 100644 index 0000000..36b109e --- /dev/null +++ b/analytics.py @@ -0,0 +1,160 @@ +import random +import math + + +def p_perms(p=99,n=100,mark=None): + mean_nn_dist = [] + for i in range(p): + temp=create_n_rand_pts(100) + temp1=average_nearest_neighbor_distance(temp) + mean_nn_dist.append(temp1); + + return mean_nn_dist + +def create_n_rand_pts(n): + n_pts = [(random.uniform(0,1), random.uniform(0,1)) for i in range(n)] + return n_pts + +def p_perms_marks(p=99,n=100,marks=None): + marks=['mercury', 'venus', 'earth', 'mars'] + mean_nn_dist = [] + for i in range(p): + temp=utils.create_marked_rand_pts(100,marks) + #print(temp.) + temp1=average_nearest_neighbor_distance(temp,marks) + mean_nn_dist.append(temp1) + + return mean_nn_dist + +def create_marked_rand_pts(n,marks=None): + n_pts=[] + for i in range(n): + chosen_mark=random.choice(marks) + temppt=point.Point(random.uniform(0,1), random.uniform(0,1),chosen_mark) + #print(temppt.x) + n_pts.append(temppt) + return n_pts +def monte_carlo_critical_bound_check(lb,ub,obs): + return obsub + +def critical_pts(distances): + return min(distances), max(distances) + +def minimum_bounding_rectangle(points): + xmin=points[1][0] + ymin=points[1][1] + xmax=points[1][0] + ymax=points[1][1] + + for i in points: + curr_x=i[0] + curr_y=i[1] + if curr_x < xmin: + xmin= curr_x + elif curr_x > xmax: + xmax= curr_x + + if curr_y < ymin: + ymin= curr_y + elif curr_y > ymax: + ymax= curr_y + mbr = [xmin,ymin,xmax,ymax] + + return mbr + +def find_largest_city(gj): + maximum=0; + features=gj['features'] + + for i in features: + if (i['properties']['pop_max']>maximum): + maximum=i['properties']['pop_max'] + city=i['properties']['nameascii'] + return city, maximum + + +def write_your_own(gj): + features=gj['features'] + count = 0 + for i in features: + if(' ' in i['properties']['name']): + count= count+1 + + return count + +def mean_center(points): + x_tot=0 + y_tot=0 + + for i in points: + x_tot+=i[0] + y_tot+=i[1] + + x = x_tot/len(points) + y = y_tot/len(points) + + return x, y + +def average_nearest_neighbor_distance(points,mark=None): + mean_d = 0 + + if(mark==None): + for i in range(len(points)): + dist_nearest=math.inf + for j in range(len(points)): + temp_p1 = (points[i].x, points[i].y) + temp_p2 = (points[j].x, points[j].y) + dist = utils.euclidean_distance(temp_p1, temp_p2) + if temp_p1 == temp_p2: + continue + elif dist < dist_nearest: + dist_nearest = dist; + mean_d += dist_nearest; + mean_d=mean_d/(len(points)) + else: + for i in range(len(points)): + dist_nearest=math.inf + for j in range(len(points)): + dist = utils.euclidean_distance((points[i].x, points[i].y), (points[j].x,points[j].y)) + if temp_p1 == temp_p2: + continue + elif dist < dist_nearest and temp_p1==temp_p2: + dist_nearest = dist; + mean_d += dist_nearest; + mean_d=mean_d/(len(points)) + + return mean_d + + +""" +def average_nearest_neighbor_distance_marks(points,mark=None): + mean_d = 0 + for i in range(len(points)): + dist_nearest=1e9 + for j in range(len(points)): + temp_p1 = (points[i].x, points[i].y) + temp_p2 = (points[j].x, points[j].y) + dist = utils.euclidean_distance(temp_p1, temp_p2) + if temp_p1 == temp_p2: + continue + elif dist < dist_nearest: + dist_nearest = dist; + mean_d += dist_nearest; + mean_d=mean_d/(len(points)) + return mean_d + + +def average_nearest_neighbor_distance(points): + mean_d = 0 + for i in points: + dist_nearest=1e9 + for j in points: + dist = utils.euclidean_distance(i, j) + if i==j: + continue + elif dist < dist_nearest: + dist_nearest = dist; + mean_d += dist_nearest; + mean_d=mean_d/(len(points)) + return mean_d +""" diff --git a/io_geojson.py b/io_geojson.py new file mode 100644 index 0000000..4989e28 --- /dev/null +++ b/io_geojson.py @@ -0,0 +1,24 @@ +import json +import folium +def read_geojson(input_file): + """ + Read a geojson file + + Parameters + ---------- + input_file : str + The PATH to the data to be read + + Returns + ------- + gj : dict + An in memory version of the geojson + """ + + with open(input_file, 'r') as f: + gj=json.load(f) + + # Please use the python json module (imported above) + # to solve this one. + + return gj diff --git a/point.py b/point.py new file mode 100644 index 0000000..b3c3ef9 --- /dev/null +++ b/point.py @@ -0,0 +1,129 @@ +import math +from math import sqrt + + +class Point(): + def __init__(self, x=0, y=0, mark=[]): + self.x = x + self.y = y + self.magnitude = euclidean_distance((self.x,self.y), (0,0)) + self.mark = mark + def __add__(self, val): + return Point(self.x + val, self.y + val) + + def __radd__(self, val): + return Point(self.x + val, self.y + val) + + def __mul__(self,val): + return Point(self.x*val, self.y*val) + def __rmul__(self, val): + return Point(self.x*val, self.y*val) + + def __neg__(self): + return Point(-self.x, -self.y) + + def check_if_coincident(self,secondPoint): + return (self.x == secondPoint.getx()) and (self.y == secondPoint.gety()) + + def shiftPoint(self, x_shift, y_shift): + self.x += x_shift + self.y += y_shift + def getx(self): + return self.x + def gety(self): + return self.y + + def get_mark(self): + return self.mark +def find_largest_city(gj): + maximum=0; + features=gj['features'] + + for i in features: + if (i['properties']['pop_max']>maximum): + maximum=i['properties']['pop_max'] + city=i['properties']['nameascii'] + return city, maximum + +def write_your_own(gj): + #Calculate the number of citues with two-word names + features=gj['features'] + count = 0 + for i in features: + if(' ' in i['properties']['name']): + count= count+1 + + return count + +def mean_center(points): + x_tot=0 + y_tot=0 + + for i in points: + x_tot+=i[0] + y_tot+=i[1] + + x = x_tot/len(points) + y = y_tot/len(points) + + return x, y + + + +def euclidean_distance(a, b): + distance = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) + return distance + +def minimum_bounding_rectangle(points): + #set initial params + xmin=points[1][0] + ymin=points[1][1] + xmax=points[1][0] + ymax=points[1][1] + + for i in points: + curr_x=i[0] + curr_y=i[1] + if curr_x < xmin: + xmin= curr_x + elif curr_x > xmax: + xmax= curr_x + + if curr_y < ymin: + ymin= curr_y + elif curr_y > ymax: + ymax= curr_y + mbr = [xmin,ymin,xmax,ymax] + + return mbr + +def mbr_area(mbr): + return (mbr[3]-mbr[1])*(mbr[2]-mbr[0]) + +def expected_distance(area, n): + return 0.5*(sqrt(area/n)) + +def manhattan_distance(a, b): + distance = abs(a[0] - b[0]) + abs(a[1] - b[1]) + return distance + +def shift_point(point, x_shift, y_shift): + x = point + y = gety(point) + + x += x_shift + y += y_shift + + return x, y + +def check_coincident(a, b): + return a == b + +def check_in(point, point_list): + return point in point_list + +def getx(point): + return point[0] + +def gety(point): + return point[1] diff --git a/point_pattern.py b/point_pattern.py new file mode 100644 index 0000000..e00951f --- /dev/null +++ b/point_pattern.py @@ -0,0 +1,146 @@ +import math # I am guessing that you will need to use the math module +import random +from point import Point + +import numpy as np +import scipy.spatial as ss + + +class PointPattern(object): + + def __init__(self): + self.points = [] + + def add_pt (self,point): + self.points.append(point) + + def remove_pt (self,index): + del(self.points[index]) + + def number_coincident_points(self): + num=0; + coincident_list=[] + for i, p1 in enumerate(self.points): + for j, p2 in enumerate(self.points): + if i!=j: + if p2 not in coincident_list: + if p1==p2: + num+=1 + coincident_list.append(p2) + return num + + + def list_marks(self): + mark_list=[] + for i in self.points: + if i.mark not in mark_list: + mark_list.append(i.mark) + return mark_list + + def points_by_mark(self): + #return a subset of points by the mark + return 0; + + def n_rand_pts(self,n=None,marks=None): + temp=[] + if(n==None): + n=len(self.points); + + for i in range(n): + temp.append(Point(random.uniform(0,1),random.uniform(0,1),random.choice(self.marks))); + + return temp + + def gen_rand_pts(self,upper_bound=1,lower_bound=0,num_pts=100): + + return np.random.uniform(lower_bound,upper_bound, (num_pts,2)); + + def critical_pts(distances): + return min(distances), max(distances) + + def nearest_neighbor_dist_numpy(self): + + return + + def average_nearest_neighbor_distance_kd(self,pts=None): + mean=0; + + + if pts==None: + points = self.points + else: + points=pts + + kdtree = ss.KDTree(points); + for i in points: + dist_nearest, nn_pt = kdtree.query(i, k=2) + mean+=dist_nearest.item(1); + + #print(dist_nearest.item(1)) + return mean/len(points); + + def average_nearest_neighbor_distance_numpy(self,pts=None): + ''' + computing using numpy + ''' + points=[] + if pts==None: + points = self.points + else: + points=pts + + nn_dists = np.array([]) + + n_dist_current=math.inf + #print(pts) + for i, point1 in enumerate(points): + for j, point2 in enumerate(points): + + if i==j: + continue + elif(ss.distance.euclidean(point1, point2) neg_words: + return 'Positive' + elif pos_words < neg_words: + return 'Negative' + elif pos_words == neg_words: + return 'Neutral' diff --git a/view.py b/view.py new file mode 100644 index 0000000..154e7b4 --- /dev/null +++ b/view.py @@ -0,0 +1,184 @@ +from PyQt4 import QtGui, QtWebKit, QtCore +from point_pattern import PointPattern +import os, sys, folium, tweet, io_geojson, random, Plot + + +class View(QtGui.QMainWindow): + + def __init__(self): + super(View, self).__init__() + self.map = None + self.web_view = None + self.map_dir = 'temp/tweet_map.html' + self.full_tweet_list = None + self.tweet_pattern = None + self.init_ui() + self.file = None + def init_ui(self): + + #Make Directory for map + os.makedirs('temp', exist_ok=True) + self.web_view = QtWebKit.QWebView() + + #Set location to sky harbor + self.map = folium.Map(location=[33.4373, -112.0078]) + + #zoom out enough to see the entire greater phoenix area + self.map.zoom_start = 9 + self.map.save('temp/tweet_map.html') + self.web_view.load(QtCore.QUrl('temp/tweet_map.html')) + self.setCentralWidget(self.web_view) + + #create a tool bar with the option of opening the Json file + open_action = QtGui.QAction('Open Json Twitter File', self) + negative_action = QtGui.QAction('Show Neg Tweets', self) + pos_action = QtGui.QAction('Show Pos Tweets',self) + neutral_action = QtGui.QAction('Show Neutral Tweets',self) + average_nn_action = QtGui.QAction('Compute Average NN Distance',self) + plot_gfunc_action = QtGui.QAction('Plot G Function',self) + + + menu_bar = self.menuBar() + file_menu = menu_bar.addMenu('&File') + file_menu.addAction(open_action) + + tweet_menu = menu_bar.addMenu('&Tweet Menu') + tweet_menu.addAction(negative_action) + tweet_menu.addAction(pos_action) + tweet_menu.addAction(neutral_action) + + analytics_menu = menu_bar.addMenu('&Analytics Menu') + analytics_menu.addAction(average_nn_action) + analytics_menu.addAction(plot_gfunc_action) + + #the action triggered will cause the open function to execute + open_action.triggered.connect(self.open) + negative_action.triggered.connect(self.show_negative_tweets) + pos_action.triggered.connect(self.show_positive_tweets) + neutral_action.triggered.connect(self.show_neutral_tweets) + average_nn_action.triggered.connect(self.disp_average_nn_dist) + plot_gfunc_action.triggered.connect(self.plot_gfunc) + + self.setGeometry(100, 100, 600, 600) + self.setWindowTitle('Map of Tweets in Phoenix') + self.show() + + ''' + Note: I am assuming the user will load a Json file before trying to sort the tweets by negative or not negative + ''' + def open(self): + file = QtGui.QFileDialog.getOpenFileName(self, caption='Open Json Twitter File') + self.file=file + if not file: + return + + tweets = [] + tweet_data = io_geojson.read_geojson(file) + for _ in tweet_data: + tweets.append(tweet.Tweet(_)) + self.full_tweet_list=tweets + self.show_folium_marks(tweets) + + + def show_negative_tweets(self): + self.show_folium_marks(self.full_tweet_list,'Negative') + def show_positive_tweets(self): + self.show_folium_marks(self.full_tweet_list,'Positive') + def show_neutral_tweets(self): + self.show_folium_marks(self.full_tweet_list,'Negative') + + def disp_average_nn_dist(self): + message = "" + + if self.tweet_pattern is None: + message = "Select Tweet File First" + return + + msg = QtGui.QMessageBox() + msg.setIcon(QtGui.QMessageBox.Information) + text = "Average nearest neighbor distance: "+str(self.tweet_pattern.average_nearest_neighbor_distance_numpy()) + msg.setText(text) + msg.exec_() + + def plot_gfunc(self): + data5=self.tweet_pattern.g_func(5) + data10=self.tweet_pattern.g_func(10) + data20=self.tweet_pattern.g_func(20) + data30 = self.tweet_pattern.g_func(30) + data100 = self.tweet_pattern.g_func(100) + + plot = Plot.Window() + + plot.plot(data5[1],data5[0],'G(5)','red') + plot.plot(data10[1], data10[0], 'G(10)','blue') + plot.plot(data20[1], data20[0], 'G(20)','green') + plot.plot(data30[1], data30[0], 'G(30)','yellow') + plot.plot(data100[1], data100[0], 'G(100)','pink') + + plot.exec_() + + def show_folium_marks(self, tweets,mark=None): + self.map = folium.Map(location=[33.4373, -112.0078]) + self.map.zoom_start = 9 + + tweet_lat=0 + tweet_lon=0 + + #set a maximum number of tweets to process + limiter=10000 + divisor = 0 + #limiter=len(tweets) + tweet_pattern = PointPattern() + #loop through all the tweets + self.tweet_pattern = None + for i, tweet in enumerate(tweets): + + if i>limiter-1: + break + + tweet_point = tweet.gen_rand_pt() + subpoint=[tweet_point.getx(), tweet_point.gety()] + if mark=='Negative' and tweet_point.get_mark()=='Negative': + tweet_lat+=tweet_point.getx() + tweet_lon+=tweet_point.gety() + folium.Marker(subpoint,tweet.text).add_to(self.map) + divisor+=1 + tweet_pattern.add_pt(subpoint) + elif mark=='Positive' and tweet_point.get_mark()=='Positive': + tweet_lat+=tweet_point.getx() + tweet_lon+=tweet_point.gety() + folium.Marker(subpoint,tweet.text).add_to(self.map) + divisor += 1 + tweet_pattern.add_pt(subpoint) + elif mark=='Neutral' and tweet_point.get_mark()=='Neutral': + tweet_lat+=tweet_point.getx() + tweet_lon+=tweet_point.gety() + folium.Marker(subpoint,tweet.text).add_to(self.map) + divisor += 1 + tweet_pattern.add_pt(subpoint) + elif mark==None: + #print('Nonemark') + tweet_lat+=tweet_point.getx() + tweet_lon+=tweet_point.gety() + folium.Marker(subpoint,tweet.text).add_to(self.map) + divisor += 1 + tweet_pattern.add_pt(subpoint) + self.tweet_pattern=tweet_pattern + print('folium printed') + #make sure to recenter the map to the average of the tweet coordinates + self.map.location=[tweet_lat/divisor,tweet_lon/divisor] + #self.map = folium.Map([tweet_lat/len(tweets),tweet_lon/len(tweets)]) + self.map.save('temp/tweet_map.html') + self.web_view.load(QtCore.QUrl('temp/tweet_map.html')) + self.setGeometry(100, 100, 600, 600) + self.setWindowTitle('Map of Tweets in Phoenix') + self.show() + + +def main(): + app = QtGui.QApplication(sys.argv) + view=View() + sys.exit(app.exec_()) + +if __name__ == '__main__': + main()