diff --git a/GIS321_All_Tweets.png b/GIS321_All_Tweets.png new file mode 100644 index 0000000..2224ba0 Binary files /dev/null and b/GIS321_All_Tweets.png differ diff --git a/GIS321_E11_G_Functions.png b/GIS321_E11_G_Functions.png new file mode 100644 index 0000000..1b3c374 Binary files /dev/null and b/GIS321_E11_G_Functions.png differ diff --git a/GIS321_E11_SubsetTweets.png b/GIS321_E11_SubsetTweets.png new file mode 100644 index 0000000..d4c7329 Binary files /dev/null and b/GIS321_E11_SubsetTweets.png differ diff --git a/Tweet.py b/Tweet.py new file mode 100644 index 0000000..2f5f922 --- /dev/null +++ b/Tweet.py @@ -0,0 +1,76 @@ +''' +Created on Apr 19, 2016 + +@author: Max Ruiz +''' + +import utils +import random + +from nltk.corpus import opinion_lexicon +from nltk.tokenize import treebank + + + +class Tweet(object): + def __init__(self, tweet_json_obj): + + self.twText = tweet_json_obj["text"] + self.twBoundingBox = tweet_json_obj["place"]["bounding_box"]["coordinates"][0] + self.twID = tweet_json_obj["id"] + self.twRetweetCount = tweet_json_obj["retweet_count"] + self.twRepliedTo = tweet_json_obj["in_reply_to_screen_name"] + self.twScreenName = tweet_json_obj["user"]["screen_name"] + self.twDate = tweet_json_obj["created_at"] + + self.sentiment = self.classifier(self.twText) + self.mark = self.sentiment + + + def getLatitude(self, corner): + return self.twBoundingBox[corner][1] + + def getLongitude(self, corner): + return self.twBoundingBox[corner][0] + + def getRandPointInBoundingBox(self): + latitude = random.uniform(self.getLatitude(0), self.getLatitude(1)) + longitude = random.uniform(self.getLongitude(0), self.getLongitude(2)) + return latitude, longitude + + def getMark(self): + return self.mark + + def getPoint(self): + x, y = self.getRandPointInBoundingBox() + return (x,y) + + + def classifier(self,sentence): + + tokenizer = treebank.TreebankWordTokenizer() + pos_words = 0 + neg_words = 0 + tokenized_sent = [word.lower() for word in tokenizer.tokenize(sentence)] + + x = list(range(len(tokenized_sent))) # x axis for the plot + y = [] + + for word in tokenized_sent: + if word in opinion_lexicon.positive(): + pos_words += 1 + y.append(1) # positive + elif word in opinion_lexicon.negative(): + neg_words += 1 + y.append(-1) # negative + else: + y.append(0) # neutral + + if pos_words > neg_words: + return 'Positive' + elif pos_words < neg_words: + return 'Negative' + elif pos_words == neg_words: + return 'Neutral' + + diff --git a/analytics.py b/analytics.py new file mode 100644 index 0000000..3698544 --- /dev/null +++ b/analytics.py @@ -0,0 +1,236 @@ +''' +Created on Feb 23, 2016 + +@author: Max Ruiz +''' + +import math + +from nltk.corpus import opinion_lexicon +from nltk.tokenize import treebank + + +def compute_critical(p): + """ + Given a list, p, of distances (constants), determine the upper and lower + bound (or max and min value) of the set. The values in p are assumed floats. + + Parameter(s): list p + + Return(s): float lower, float upper + """ + lower = min(p) + upper = max(p) + return lower, upper + +def check_significant(lower, upper, observed): + """ + Check if given observed point is outside or within a given lower and upper + bound. + + Parameter(s): float lower, float upper, float observed. + + Return(s): boolean + """ + return observed < lower or observed > upper + +def find_largest_city(gj): + """ + Iterate through a geojson feature collection and + find the largest city. Assume that the key + to access the maximum population is 'pop_max'. + + Parameters + ---------- + gj : dict + A GeoJSON file read in as a Python dictionary + + Returns + ------- + city : str + The largest city + + population : int + The population of the largest city + """ + + max_population = 0 + for feat in gj['features']: + test_max_pop = feat['properties']['pop_max'] + if test_max_pop > max_population: + max_population = test_max_pop + city = feat['properties']['name'] + + return city, max_population + +def mean_center(points): + """ + Given a set of points, compute the mean center + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + x : float + Mean x coordinate + + y : float + Mean y coordinate + """ + sumx = 0.0 + sumy = 0.0 + for coord in points: + sumx += coord[0] + sumy += coord[1] + x = sumx / len(points) + y = sumy / len(points) + + return x, y + + +def average_nearest_neighbor_distance_tuples(points): + """ + Given a set of points, compute the average nearest neighbor. + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + mean_d : float + Average nearest neighbor distance + + References + ---------- + Clark and Evan (1954 Distance to Nearest Neighbor as a + Measure of Spatial Relationships in Populations. Ecology. 35(4) + p. 445-453. + """ + min_dist_sum = 0 + for coord_n in points: + first = True + for coord_m in points: + if coord_n == coord_m: + continue + else: + d = euclidean_distance(coord_n, coord_m) + if first: + min_dist = d + first = False + else: + if d < min_dist: + min_dist = d + min_dist_sum += min_dist + + mean_d = min_dist_sum / len(points) + + return mean_d + +def average_nearest_neighbor_distance(points, mark = None): + if mark != None: + pointsWithMark = list() + for x in range(len(points)): + if points[x].getMark() == mark: + pointsWithMark.append(points[x].getPoint()) + else: + continue + return average_nearest_neighbor_distance_tuples(pointsWithMark) + else: + allPoints = list(points[x].getPoint() for x in range(len(points))) + return average_nearest_neighbor_distance_tuples(allPoints) + +def minimum_bounding_rectangle(points): + """ + Given a set of points, compute the minimum bounding rectangle. + + Parameters + ---------- + points : list + A list of points in the form (x,y) + + Returns + ------- + : list + Corners of the MBR in the form [xmin, ymin, xmax, ymax] + """ + xmin = 0 + xmax = 0 + ymin = 0 + ymax = 0 + for coord in points: + if coord[0] < xmin: + xmin = coord[0] + elif coord[0] > xmax: + xmax = coord[0] + + if coord[1] < ymin: + ymin = coord[1] + elif coord[1] > ymax: + ymax = coord[1] + + xcorner = xmax - xmin + ycorner = ymax - ymin + mbr = [0,0,xcorner,ycorner] + + return mbr + + +def mbr_area(mbr): + """ + Compute the area of a minimum bounding rectangle + """ + length = mbr[3] - mbr[1] + width = mbr[2] - mbr[0] + area = length * width + + return area + + +def expected_distance(area, n): + """ + Compute the expected mean distance given + some study area. + + This makes lots of assumptions and is not + necessarily how you would want to compute + this. This is just an example of the full + analysis pipe, e.g. compute the mean distance + and the expected mean distance. + + Parameters + ---------- + area : float + The area of the study area + + n : int + The number of points + """ + + expected = 0.5 * math.sqrt(area / n) + return expected + +def euclidean_distance(a, b): + """ + Compute the Euclidean distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + + distance : float + The Euclidean distance between the two points + """ + distance = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) + return distance diff --git a/io_geojson.py b/io_geojson.py new file mode 100644 index 0000000..2bdd3a1 --- /dev/null +++ b/io_geojson.py @@ -0,0 +1,18 @@ +''' +Created on Apr 19, 2016 + +@author: Max Ruiz +''' +import json + +def read_geojson(input_file): + # Please use the python json module (imported above) + # to solve this one. + with open(input_file,'r') as f: + gj = json.load(f) + return gj + +def processTweets(tweets): + with open(tweets, 'r') as f: + jfile = json.load(f) + return jfile diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..e71edc5 --- /dev/null +++ b/utils.py @@ -0,0 +1,214 @@ +''' +Created on Feb 23, 2016 + +@author: Max Ruiz +''' +import math +import random +#import analytics +from analytics import average_nearest_neighbor_distance +from point import Point + + +def g_function(pointsArr, d): + gPrime = 0 + for x in pointsArr: + shrtDist = 100000 + for y in pointsArr: + if x == y: + continue + else: + temp = euclidean_distance(x,y) + if temp < shrtDist: + shrtDist = temp + if shrtDist <= d: + gPrime += shrtDist + + return gPrime / len(pointsArr) + +def create_random_marked_points(n, marks=[]): + random.seed() + randPoints = list() + for x in range(n): + _x = random.randint(0,100) + _y = random.randint(0,100) + if marks is None: + randPoints.append(Point(_x, _y)) + else: + if len(marks) == 0: + randPoints.append(Point(_x, _y)) + else: + rndmark = random.choice(marks) + randPoints.append(Point(_x, _y, rndmark)) + return randPoints + +def permutations(p=99, n=100, marks = None): + """ + Calculate p number of average_nearest_neighbor_distances from n number + of randomly generated points. Return list of size p with distance values. + + Parameter(s): integer p, integer n + + Return(s): list perm + """ + perm = [] + for x in range(p): + points = create_random_marked_points(n, marks) + avg_nnd = average_nearest_neighbor_distance(points) + perm.append(avg_nnd) + + return perm + +def manhattan_distance(a, b): + """ + Compute the Manhattan distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + distance : float + The Manhattan distance between the two points + """ + distance = abs(a[0] - b[0]) + abs(a[1] - b[1]) + return distance + + +def euclidean_distance(a, b): + """ + Compute the Euclidean distance between two points + + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + + distance : float + The Euclidean distance between the two points + """ + distance = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) + return distance + + +def shift_point(point, x_shift, y_shift): + """ + Shift a point by some amount in the x and y directions + + Parameters + ---------- + point : tuple + in the form (x,y) + + x_shift : int or float + distance to shift in the x direction + + y_shift : int or float + distance to shift in the y direction + + Returns + ------- + new_x : int or float + shited x coordinate + + new_y : int or float + shifted y coordinate + + Note that the new_x new_y elements are returned as a tuple + + Example + ------- + >>> point = (0,0) + >>> shift_point(point, 1, 2) + (1,2) + """ + x = getx(point) + y = gety(point) + + x += x_shift + y += y_shift + + return x, y + + +def check_coincident(a, b): + """ + Check whether two points are coincident + Parameters + ---------- + a : tuple + A point in the form (x,y) + + b : tuple + A point in the form (x,y) + + Returns + ------- + equal : bool + Whether the points are equal + """ + return a == b + + +def check_in(point, point_list): + """ + Check whether point is in the point list + + Parameters + ---------- + point : tuple + In the form (x,y) + + point_list : list + in the form [point, point_1, point_2, ..., point_n] + """ + return point in point_list + + +def getx(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + x coordinate + """ + return point[0] + + +def gety(point): + """ + A simple method to return the x coordinate of + an tuple in the form(x,y). We will look at + sequences in a coming lesson. + + Parameters + ---------- + point : tuple + in the form (x,y) + + Returns + ------- + : int or float + y coordinate + """ + return point[1] diff --git a/view.py b/view.py new file mode 100644 index 0000000..520907d --- /dev/null +++ b/view.py @@ -0,0 +1,245 @@ +from PyQt5 import QtCore, QtWidgets, QtWebKitWidgets +from analytics import average_nearest_neighbor_distance +from utils import g_function +from tkinter import Tk +import tkinter.messagebox as mb +import sys +import folium +import io_geojson +import Tweet +import random +import matplotlib.pyplot as plt + +PHX_COORDS = [33.441957, -112.072913] +NORMALIZED_PHX_DISTANCE = 57.68 +AVG_DIST_ALL = 4.293 +AVG_DIST_POS = 9.113 +AVG_DIST_NEG = 11.12 +AVG_DIST_NEU = 5.367 + +class Ui_MainWindow(object): + def __init__(self, MainWindow): + self.MainWindow = MainWindow + self.MainWindow.setObjectName("MainWindow") + self.MainWindow.resize(434, 316) + + self.map = folium.Map(location=PHX_COORDS) + self.map.zoom_start = 8 + self.mapFile = "osm_map.html" + self.map.save(self.mapFile) + self.sentiment = None + + self.setupUi() + + def setupUi(self): + + self.setupMenuBar() + self.setupStatusBar() + + + # place widgets here + self.webView = QtWebKitWidgets.QWebView(MainWindow) + self.webView.setHtml(open(self.mapFile,'r').read()) + + + + self.MainWindow.setCentralWidget(self.webView) + + def setupMenuBar(self): + + # Exit + exitAction = QtWidgets.QAction(self.MainWindow) + exitAction.setText('Exit') + exitAction.setShortcut('Ctrl+Q') + exitAction.setStatusTip('Exit Application') + exitAction.triggered.connect(QtWidgets.qApp.quit) + + # Open + openAction = QtWidgets.QAction(self.MainWindow) + openAction.setText('Open') + openAction.setShortcut('Ctrl+O') + openAction.setStatusTip('Open a tweet .json file') + openAction.triggered.connect(lambda x: self.visualizeTweets("All")) + + # Positive tweets + posTwAction = QtWidgets.QAction(self.MainWindow) + posTwAction.setText('Positive Tweets') + posTwAction.setStatusTip('Remap tweet locations based on Positive senitmental tweets') + posTwAction.triggered.connect(lambda x: self.visualizeTweets('Positive')) + + # Negative tweets + negTwAction = QtWidgets.QAction(self.MainWindow) + negTwAction.setText('Negative Tweets') + negTwAction.setStatusTip('Remap tweet locations based on negative senitmental tweets') + negTwAction.triggered.connect(lambda x: self.visualizeTweets('Negative')) + + # Neutral tweets + neuTwAction = QtWidgets.QAction(self.MainWindow) + neuTwAction.setText('Neutral Tweets') + neuTwAction.setStatusTip('Remap tweet locations based on neutral senitmental tweets') + neuTwAction.triggered.connect(lambda x: self.visualizeTweets('Neutral')) + + # Compute Nearest Neighbor of Tweets + nnTwAction = QtWidgets.QAction(self.MainWindow) + nnTwAction.setText('Nearest Neighbor') + nnTwAction.setStatusTip('Find nearest neighbor of tweets') + nnTwAction.triggered.connect(self.nearestNeighborTweets) + + # Compute G function + gfTwAction = QtWidgets.QAction(self.MainWindow) + gfTwAction.setText('G Function') + gfTwAction.setStatusTip('Compute the G function on tweets') + gfTwAction.triggered.connect(self.compGFunction) + + menubar = QtWidgets.QMenuBar(self.MainWindow) + + menuFile = QtWidgets.QMenu(menubar) + menuFile.setTitle('File') + menuFile.addAction(exitAction) + menuFile.addAction(openAction) + + menuVisu = QtWidgets.QMenu(menubar) + menuVisu.setTitle('Visualize') + menuVisu.addAction(posTwAction) + menuVisu.addAction(negTwAction) + menuVisu.addAction(neuTwAction) + + menuComp = QtWidgets.QMenu(menubar) + menuComp.setTitle('Compute') + menuComp.addAction(nnTwAction) + menuComp.addAction(gfTwAction) + + + self.MainWindow.setMenuBar(menubar) + + + menubar.addAction(menuFile.menuAction()) + menubar.addAction(menuVisu.menuAction()) + menubar.addAction(menuComp.menuAction()) + + def setupStatusBar(self): + self.statusbar = QtWidgets.QStatusBar(self.MainWindow) + self.MainWindow.setStatusBar(self.statusbar) + + def visualizeTweets(self, sentiment): + self.sentiment = sentiment + jfile = self.openJFile() + self.tweetObjArr = self.filterTweets(self.processTweetFile(jfile)) + self.mapTweets(self.tweetObjArr) + + def nearestNeighborTweets(self): + """ In this function, I will normalize it to the unit mile + by multiplying the result by the distance in miles traveling + about 1 unit longitudinally at roughly the same latitude. + first coord = (33.471798, -112.445462) + second coord = (33.451355, -111.442852) + Distance (Miles / Spherical Earth) = 57.68 miles + """ + root = Tk() + root.withdraw() + if self.sentiment == None: + mb.showerror("Error", 'Please Open or Visualize tweets first.') + else: + if self.sentiment == 'All': + mark = None + else: + mark = self.sentiment + nnd = NORMALIZED_PHX_DISTANCE * average_nearest_neighbor_distance(self.tweetObjArr, mark) + mb.showinfo("Info", 'The average nearest neighbor of\n{0} tweets is {1}'.format(self.sentiment, nnd)) + + # Result for All Tweets = 4.293 + # Result for Positive Tweets = 9.113 + # Result for Negative Tweets = 11.12 + # Result for Neutral Tweets = 5.367 + + def compGFunction(self): + root = Tk() + root.withdraw() + if self.sentiment == None: + mb.showerror("Error", 'Please Open or Visualize tweets first.') + else: + pointsArr = [] + for tw in self.tweetObjArr: + pointsArr.append(tw.getPoint()) + + samples = 200 + d = [] + G_d = [] + if self.sentiment == 'All': + for x in range(0, samples): + i = AVG_DIST_ALL / samples * x + d.append(i) + G_d.append(g_function(pointsArr, i)) + elif self.sentiment == 'Positive': + for x in range(0, samples): + i = AVG_DIST_POS / samples * x + d.append(i) + G_d.append(g_function(pointsArr, i)) + elif self.sentiment == 'Negative': + for x in range(0, samples): + i = AVG_DIST_NEG / samples * x + d.append(i) + G_d.append(g_function(pointsArr, i)) + elif self.sentiment == 'Neutral': + for x in range(0, samples): + i = AVG_DIST_NEU / samples * x + d.append(i) + G_d.append(g_function(pointsArr, i)) + + plt.figure(1) + plt.plot(d, G_d, 'bo') + plt.title('G function of {} tweets.'.format(self.sentiment)) + plt.xlabel('Distance d (miles)') + plt.ylabel('G(d)') + plt.show() + + def openJFile(self): + try: + jfile = QtWidgets.QFileDialog.getOpenFileName(parent=MainWindow, caption='Open a tweet .json file',filter='*.json')[0] + return jfile + except: + root = Tk() + root.withdraw() + mb.showinfo("Error", 'Could not open tweet file.') + + def processTweetFile(self, jfile): + tweetObjs = [] + tweets = io_geojson.processTweets(jfile) + for t in tweets: + tweetObjs.append(Tweet.Tweet(t)) + return tweetObjs + + + def filterTweets(self, tweetObjs): + filteredTweets = [] + for tw in tweetObjs: + if self.sentiment == "All": + filteredTweets.append(tw) + elif tw.sentiment == self.sentiment: + filteredTweets.append(tw) + return filteredTweets + + + def mapTweets(self, tweetObjs): + + random.seed(1212) + + # create new map for new file + self.map = folium.Map(location=PHX_COORDS) + self.map.zoom_start = 8 + + for tw in tweetObjs: + latitude, longitude = tw.getRandPointInBoundingBox() + folium.Marker([latitude, longitude], popup=tw.twText).add_to(self.map) + + self.map.save(self.mapFile) + self.webView.setHtml(open(self.mapFile,'r', encoding="utf8").read()) + # used https://nlp.fi.muni.cz/projects/chared/ to find out encoding + + +if __name__ == '__main__': + app = QtWidgets.QApplication(sys.argv) + MainWindow = QtWidgets.QMainWindow() + ui = Ui_MainWindow(MainWindow) + MainWindow.show() + sys.exit(app.exec_())