From 991eca70382d4c36084319c5abd55fae4261c9d2 Mon Sep 17 00:00:00 2001 From: florpor Date: Tue, 11 Nov 2014 17:55:36 +0200 Subject: [PATCH 01/21] the worker can post to facebook and twitter when new plans are scraped --- requirements.txt | 3 +++ tools/scrapelib.py | 10 +++++++ tools/sociallib.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++ worker.py | 3 +++ 4 files changed, 82 insertions(+) create mode 100644 tools/sociallib.py diff --git a/requirements.txt b/requirements.txt index c4214f8..39bc03c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,6 @@ times==0.6 wsgiref==0.1.2 logging pylibmc==1.3.0 +facepy==1.0.4 +twitter==1.15.0 +bitly_api==0.3 diff --git a/tools/scrapelib.py b/tools/scrapelib.py index 59d2c4c..5ac8601 100644 --- a/tools/scrapelib.py +++ b/tools/scrapelib.py @@ -13,6 +13,7 @@ from conn import * from mmi_scrape import get_mmi_gush_json from mavat_scrape import get_mavat_gush_json +from sociallib import post date_pattern = re.compile(r'(\d+/\d+/\d+)') mmi_bad_plan_number_no_slash_pattern = re.compile(ur'^(.*[0-9]+)([א-ת])$') @@ -234,6 +235,9 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): plan['gushim'] = [ gush_id ] log.debug("Inserting new plan data: %s", plan) db.plans.insert(plan) + + # post plan to social networks + post(plan) else: # since the plan exists get it's _id and gushim values plan['_id'] = existing_plan['_id'] @@ -245,6 +249,9 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): # since we are sending an _id value the document will be updated log.debug("Updating modified plan data: %s", plan) db.plans.save(plan) + + # post plan to social networks + post(plan) else: # compare the values. maybe the plan wasn't modified at all plan_copy = deepcopy(plan) @@ -255,6 +262,9 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): # since we are sending an _id value the document will be updated log.debug("Updating modified plan data: %s", plan) db.plans.save(plan) + + # post plan to social networks + post(plan) # just make sure these are deleted because we will probably have quite a few iterations here del plan_copy diff --git a/tools/sociallib.py b/tools/sociallib.py new file mode 100644 index 0000000..b711da8 --- /dev/null +++ b/tools/sociallib.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +import bitly_api +from facepy import GraphAPI +from twitter import * +import logging +import os + +log = logging.getLogger(__name__) + + +def post(plan): + # generate title and content for posts + title = plan['location_string'] + # 'not title' is not supposed to happen anymore because every plan currently has a location + if not title: + title = plan['number'] + + # special emphasizing for some statuses + if plan['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: + status = u'»»%s««' % plan['status'] + else: + status = plan['status'] + + content = plan['essence'] + ' [' + status + ', ' + '%02d/%02d/%04d' % (plan['day'], plan['month'], plan['year']) + \ + ', ' + plan['number'] + ']' + + # if bitly access token is defined shorten the link + if 'BITLY_TOKEN' in os.environ.keys(): + try: + b = bitly_api.Connection(access_token=os.environ['BITLY_TOKEN']) + shortie = b.shorten(plan['details_link']) + url = shortie['url'] + except Exception, e: + log.exception('Could not shorten the link using bit.ly - %s', e) + url = plan['details_link'] + else: + url = plan['details_link'] + + # post to facebook page + if 'FB_TOKEN' in os.environ.keys() and 'FB_PAGE_ID' in os.environ.keys(): + try: + graph = GraphAPI(os.environ['FB_TOKEN']) + graph.post( + path = 'v2.2/%s/feed' % os.environ['FB_PAGE_ID'], + message = '%s: %s %s' % (title, content, url), + retry = 10 + ) + except Exception, e: + log.exception('Could not post new plan to facebook page - %s', e) + + # post to twitter feed + if 'TW_TOKEN' in os.environ.keys() and 'TW_TOKEN_SECRET' in os.environ.keys() and 'TW_CONSUMER' in os.environ.keys() and 'TW_CONSUMER_SECRET' in os.environ.keys(): + try: + tweet_content = '%s: %s' % (title, content) + + # shorten our content - max size should be 118, not including the link which will be shortened by twitter if bit.ly is not enabled + if len(tweet_content) > 118: + tweet = '%s... %s' % (tweet_content[0:114], url) + else: + tweet = '%s %s' % (tweet_content, url) + + t = Twitter(auth=OAuth(consumer_key=os.environ['TW_CONSUMER'], consumer_secret=os.environ['TW_CONSUMER_SECRET'], token=os.environ['TW_TOKEN'], token_secret=os.environ['TW_TOKEN_SECRET'])) + t.statuses.update(status=tweet) + except Exception, e: + log.exception('Could not post new plan to twitter feed - %s', e) diff --git a/worker.py b/worker.py index fa7d2dd..3b612dd 100644 --- a/worker.py +++ b/worker.py @@ -5,12 +5,15 @@ import os import redis from rq import Worker, Queue, Connection +import logging listen = ['high', 'default', 'low'] redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379') redis_conn = redis.from_url(redis_url) if __name__ == '__main__': + logging.basicConfig(format='%(asctime)-15s %(name)s %(levelname)s %(message)s', level=logging.WARNING) + with Connection(redis_conn): worker = Worker(map(Queue, listen)) worker.work(burst=True) From ff176f0d906f23595183c5b8d87bd0d7b8dfb9eb Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 10:22:17 +0200 Subject: [PATCH 02/21] helper script to get facebook acess tokens for posting to pages --- requirements.txt | 1 + scripts/get_facebook_token.py | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 scripts/get_facebook_token.py diff --git a/requirements.txt b/requirements.txt index 39bc03c..7dc7227 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ pylibmc==1.3.0 facepy==1.0.4 twitter==1.15.0 bitly_api==0.3 +web.py==0.37 diff --git a/scripts/get_facebook_token.py b/scripts/get_facebook_token.py new file mode 100644 index 0000000..d251582 --- /dev/null +++ b/scripts/get_facebook_token.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +import web +from facepy import GraphAPI, utils +from urlparse import parse_qs + +url = ('/', 'index') + +app_id = '?' +app_secret = '?' +post_login_url = 'http://0.0.0.0:8080/' + +class index: + def GET(self): + user_data = web.input(code=None) + + if not user_data.code: + dialog_url = ('http://www.facebook.com/dialog/oauth?' + + 'client_id=' + app_id + + '&redirect_uri=' + post_login_url + + '&scope=manage_pages') + + return '' + else: + try: + graph = GraphAPI() + response = graph.get( + path='oauth/access_token', + client_id=app_id, + client_secret=app_secret, + redirect_uri=post_login_url, + code=user_data.code + ) + data = parse_qs(response) + + extended_token = utils.get_extended_access_token(data['access_token'][0], app_id, app_secret) + graph = GraphAPI(extended_token[0]) + accounts = graph.get(path = 'me/accounts') + result = u'' + result += u'' + + for entry in accounts['data']: + result += u'' + + result += '
NameIdAccess Token
' + unicode(entry['name']) + u'' + result += unicode(entry['id']) + u'' + unicode(entry['access_token']) + u'
' + return result + except Exception, e: + return 'Error: %s' % e + + +if __name__ == '__main__': + print 'Please browse to this address to authorize Taba Publisher:' + + app = web.application(url, globals()) + app.run() From 36cebb83c148877c12372c3767d68ebfe99cb53b Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 10:25:21 +0200 Subject: [PATCH 03/21] get_facebook_token note --- scripts/get_facebook_token.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/get_facebook_token.py b/scripts/get_facebook_token.py index d251582..a3ade5b 100644 --- a/scripts/get_facebook_token.py +++ b/scripts/get_facebook_token.py @@ -1,5 +1,11 @@ # -*- coding: utf-8 -*- +""" +Adapted from: http://stackoverflow.com/a/16743363 +This script will run a small web server, redirect you to authorize the Taba Publisher facebook +app to manage_pages permission, extend the access token and print out your page tokens +""" + import web from facepy import GraphAPI, utils from urlparse import parse_qs From 80d71b7a881bc8f6e865c4725a588ad63c640094 Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 11:40:09 +0200 Subject: [PATCH 04/21] formatting and clarity to user --- scripts/get_facebook_token.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/get_facebook_token.py b/scripts/get_facebook_token.py index a3ade5b..1b2b9a2 100644 --- a/scripts/get_facebook_token.py +++ b/scripts/get_facebook_token.py @@ -9,12 +9,10 @@ import web from facepy import GraphAPI, utils from urlparse import parse_qs - -url = ('/', 'index') -app_id = '?' -app_secret = '?' -post_login_url = 'http://0.0.0.0:8080/' +app_id = '' +app_secret = '' + class index: def GET(self): @@ -23,7 +21,7 @@ def GET(self): if not user_data.code: dialog_url = ('http://www.facebook.com/dialog/oauth?' + 'client_id=' + app_id + - '&redirect_uri=' + post_login_url + + '&redirect_uri=http://0.0.0.0:8080/' + '&scope=manage_pages') return '' @@ -34,7 +32,7 @@ def GET(self): path='oauth/access_token', client_id=app_id, client_secret=app_secret, - redirect_uri=post_login_url, + redirect_uri='http://0.0.0.0:8080/', code=user_data.code ) data = parse_qs(response) @@ -56,7 +54,10 @@ def GET(self): if __name__ == '__main__': - print 'Please browse to this address to authorize Taba Publisher:' - - app = web.application(url, globals()) - app.run() + if app_id == '' or app_secret == '': + print 'Variables app_id and app_secret must be set to your Facebook app\'s values' + print 'Also, "http://0.0.0.0:8080" has to be set as a valid OAuth redirect URI in your app\'s advanced settings' + else: + print 'Please browse to this address to authorize Taba Publisher:' + app = web.application(url = ('/', 'index'), globals()) + app.run() From 0dd4eb05f54096a685c4ff5f8de738db6402b6a1 Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 11:42:12 +0200 Subject: [PATCH 05/21] helper script to get twitter acess tokens for posting to feed --- scripts/get_twitter_token.py | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 scripts/get_twitter_token.py diff --git a/scripts/get_twitter_token.py b/scripts/get_twitter_token.py new file mode 100644 index 0000000..1287428 --- /dev/null +++ b/scripts/get_twitter_token.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +""" +Build using code from: https://code.google.com/p/python-twitter/source/browse/get_access_token.py +Copyright 2007 The Python-Twitter Developers , licensed under the Apache License, Version 2.0 + +This script will run a small web server, redirect you to authorize the Taba Publisher twitter +app to your account and print out your access token and secret +""" + +import os +from urlparse import parse_qsl +import oauth2 as oauth +import web +from facepy import GraphAPI, utils +from urlparse import parse_qs + +consumer_key = '' +consumer_secret = '' + + +class index: + def GET(self): + user_data = web.input(oauth_token=None, oauth_verifier=None) + + if not user_data.oauth_token: + oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) + oauth_client = oauth.Client(oauth_consumer) + resp, content = oauth_client.request('https://api.twitter.com/oauth/request_token', 'GET') + + if resp['status'] != '200': + return 'Invalid respond from Twitter requesting temp token: %s' % resp['status'] + else: + request_token = dict(parse_qsl(content)) + + auth_url = ('https://api.twitter.com/oauth/authorize?' + + 'oauth_token=' + request_token['oauth_token'] + + '&oauth_callback=http://0.0.0.0:8080/') + + return '' + else: + token = oauth.Token(user_data.oauth_token, '') + token.set_verifier(user_data.oauth_verifier) + + oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) + oauth_client = oauth.Client(oauth_consumer, token) + resp, content = oauth_client.request('https://api.twitter.com/oauth/access_token', method='POST', body='oauth_callback=oob&oauth_verifier=%s' % user_data.oauth_verifier) + access_token = dict(parse_qsl(content)) + + if resp['status'] != '200': + return 'The request for a Token did not succeed: %s' % resp['status'] + else: + result = u'' + result += u'Access Token: ' + access_token['oauth_token'] + result += u'
Access Token Secret: ' + access_token['oauth_token_secret'] + result += u'' + return result + + +if __name__ == '__main__': + if consumer_key == '' or consumer_secret == '': + print 'Variables consumer_key and consumer_secret must be set to your Twitter app\'s values' + print 'Also, "http://0.0.0.0:8080" has to be set as a valid callback URL in your app\'s settings' + else: + print 'Please browse to this address to authorize Taba Publisher:' + app = web.application(('/', 'index'), globals()) + app.run() From 14d05218c324f3347b0cbbdb6bd95d61ddbd034b Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 11:42:48 +0200 Subject: [PATCH 06/21] seperated requirements that are only for scripts from main requirements.txt file --- requirements.txt | 1 - scripts/requirements.txt | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 scripts/requirements.txt diff --git a/requirements.txt b/requirements.txt index 7dc7227..39bc03c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,3 @@ pylibmc==1.3.0 facepy==1.0.4 twitter==1.15.0 bitly_api==0.3 -web.py==0.37 diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..00fa79c --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,2 @@ +web.py==0.37 +oauth2==1.5.211 From ad65fa54867ece26098b27e5971b41d4b1795b00 Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 12:19:00 +0200 Subject: [PATCH 07/21] social posting documentation and a bit of space formatting --- DEPLOYMENT.md | 36 ++++++++++++++++++++++++++++++++++++ scripts/get_twitter_token.py | 6 +++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 227bf65..d903488 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -23,6 +23,42 @@ To deploy a new municipality, run: `fab create_client:holon,"חולון"` after To change client configuration, you can edit `munis.js` manually later on, according to the [Municipality Index File syntax](http://github.com/niryariv/opentaba-client/blob/master/DEPLOYMENT.md#municipality-index-file). +##Automatic Facebook and Twitter Posting +The server is able to post a plan's content to a Facebook page and Twitter feed every time a plan is created or updated. +To enable this feature, environment variables need to be set on the server with things like access tokens, consumer keys etc. +You can enable Facebook only, Twitter only or both, and can also enable Bit.ly as a link shortener, or have your links posted in their original form. + +###Environemnt Variables +####Facebook +The needed variables for Facebook posting are `FB_TOKEN` and `FB_PAGE_ID`, which correspond to the page access token after you gave the publisher app the `manage_pages` permission, and the page's id. +To set them run (opentaba-server-holon is the application name in this example and the ones below): +`heroku config:set FB_TOKEN="token" --app opentaba-server-holon +heroku config:set FB_PAGE_ID="page_id" --app opentaba-server-holon` +####Twitter +The needed variables for Twitter posting are `TW_TOKEN`, `TW_TOKEN_SECRET`, `TW_CONSUMER` and `TW_CONSUMER_SECRET`, which correspond to the access token after you authorized the publiser app, the access token secret and the publisher app's consumer key and consumer secret. +To set them run: +`heroku config:set TW_TOKEN="token" --app opentaba-server-holon +heroku config:set TW_TOKEN_SECRET="token_secret" --app opentaba-server-holon +heroku config:set TW_CONSUMER="consumer" --app opentaba-server-holon +heroku config:set TW_CONSUMER_SECRET="consumer_secret" --app opentaba-server-holon` +####Bit.ly +If you want links to be shortened before they are posted, you can enable Bit.ly shortening (not a must for neither Facebook nor Twitter posting). +The needed variable is only `BITLY_TOKEN`. Set it by running: `heroku config:set BITLY_TOKEN="token" --app opentaba-server-holon` + +###Getting The Tokens +There are two helper scripts made to help you authorize the Facebook and Twitter apps, which require manual web authorization, and get your access tokens easily. +Before you can run them there are two things you must do: + 1. Install their required libraries on your environment, ie. `pip install -r scripts/requirements.txt` + 2. Set the app id and app secret on the Facebook script, or consumer key and consumer secret on the Twitter script. These are obviously not provided with the script, and are attainable at both apps' settings pages. +####Facebook +Run the `scripts/get_facebook_token.py` script, and browse [http://0.0.0.0:8080](http://0.0.0.0:8080). +After authorizing the app, you will be redirected to a page which will list all your pages, their ids and their access tokens. Our server only posts to one page, so pick one and set the environment variables accordingly. +####Twitter +Run the `scripts/get_twitter_token.py` script, and browse [http://0.0.0.0:8080](http://0.0.0.0:8080). +After authorizing the app, you will be redirected to a page with your access token and access token secret. +####Bit.ly +Simply go to the Bit.ly website's [apps page](https://bitly.com/a/oauth_apps) and generate a generic access token, which you can use. + ##All Fabric Tasks ###Server + `fab create_server:muni_name, "display_name"` diff --git a/scripts/get_twitter_token.py b/scripts/get_twitter_token.py index 1287428..6cc922e 100644 --- a/scripts/get_twitter_token.py +++ b/scripts/get_twitter_token.py @@ -15,7 +15,7 @@ from facepy import GraphAPI, utils from urlparse import parse_qs -consumer_key = '' +consumer_key = '' consumer_secret = '' @@ -43,9 +43,9 @@ def GET(self): token.set_verifier(user_data.oauth_verifier) oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) - oauth_client = oauth.Client(oauth_consumer, token) + oauth_client = oauth.Client(oauth_consumer, token) resp, content = oauth_client.request('https://api.twitter.com/oauth/access_token', method='POST', body='oauth_callback=oob&oauth_verifier=%s' % user_data.oauth_verifier) - access_token = dict(parse_qsl(content)) + access_token = dict(parse_qsl(content)) if resp['status'] != '200': return 'The request for a Token did not succeed: %s' % resp['status'] From 029d572ac6607c964f9579135c3a26b9ce041850 Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 12:24:33 +0200 Subject: [PATCH 08/21] can never get this markdown right the first time --- DEPLOYMENT.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index d903488..9513777 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -32,15 +32,19 @@ You can enable Facebook only, Twitter only or both, and can also enable Bit.ly a ####Facebook The needed variables for Facebook posting are `FB_TOKEN` and `FB_PAGE_ID`, which correspond to the page access token after you gave the publisher app the `manage_pages` permission, and the page's id. To set them run (opentaba-server-holon is the application name in this example and the ones below): -`heroku config:set FB_TOKEN="token" --app opentaba-server-holon -heroku config:set FB_PAGE_ID="page_id" --app opentaba-server-holon` +``` +heroku config:set FB_TOKEN="token" --app opentaba-server-holon +heroku config:set FB_PAGE_ID="page_id" --app opentaba-server-holon +``` ####Twitter The needed variables for Twitter posting are `TW_TOKEN`, `TW_TOKEN_SECRET`, `TW_CONSUMER` and `TW_CONSUMER_SECRET`, which correspond to the access token after you authorized the publiser app, the access token secret and the publisher app's consumer key and consumer secret. To set them run: -`heroku config:set TW_TOKEN="token" --app opentaba-server-holon +``` +heroku config:set TW_TOKEN="token" --app opentaba-server-holon heroku config:set TW_TOKEN_SECRET="token_secret" --app opentaba-server-holon heroku config:set TW_CONSUMER="consumer" --app opentaba-server-holon -heroku config:set TW_CONSUMER_SECRET="consumer_secret" --app opentaba-server-holon` +heroku config:set TW_CONSUMER_SECRET="consumer_secret" --app opentaba-server-holon +``` ####Bit.ly If you want links to be shortened before they are posted, you can enable Bit.ly shortening (not a must for neither Facebook nor Twitter posting). The needed variable is only `BITLY_TOKEN`. Set it by running: `heroku config:set BITLY_TOKEN="token" --app opentaba-server-holon` @@ -50,6 +54,7 @@ There are two helper scripts made to help you authorize the Facebook and Twitter Before you can run them there are two things you must do: 1. Install their required libraries on your environment, ie. `pip install -r scripts/requirements.txt` 2. Set the app id and app secret on the Facebook script, or consumer key and consumer secret on the Twitter script. These are obviously not provided with the script, and are attainable at both apps' settings pages. + ####Facebook Run the `scripts/get_facebook_token.py` script, and browse [http://0.0.0.0:8080](http://0.0.0.0:8080). After authorizing the app, you will be redirected to a page which will list all your pages, their ids and their access tokens. Our server only posts to one page, so pick one and set the environment variables accordingly. From 3de0b51b3a99c8f5d0817bd03f7bb17a6bee14df Mon Sep 17 00:00:00 2001 From: florpor Date: Wed, 12 Nov 2014 12:30:28 +0200 Subject: [PATCH 09/21] updated test_requirements.txt for travis --- test_requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_requirements.txt b/test_requirements.txt index 064fdae..68d1014 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -16,3 +16,6 @@ rq==0.3.2 #wsgiref==0.1.2 logging pylibmc==1.3.0 +facepy==1.0.4 +twitter==1.15.0 +bitly_api==0.3 From fcf6e17c3909b7196182d157288e163e70c8e77a Mon Sep 17 00:00:00 2001 From: florpor Date: Tue, 2 Dec 2014 14:40:49 +0200 Subject: [PATCH 10/21] moved social posting to a detached service. re-ordering of helpers --- app.py | 105 ++++++---------------------------------- requirements.txt | 3 -- tools/helpers.py | 116 +++++++++++++++++++++++++++++++++++++++++++++ tools/scrapelib.py | 11 +++-- tools/sociallib.py | 77 ++++++++---------------------- 5 files changed, 158 insertions(+), 154 deletions(-) create mode 100644 tools/helpers.py diff --git a/app.py b/app.py index c244f60..dfb9e06 100644 --- a/app.py +++ b/app.py @@ -2,89 +2,18 @@ #!/usr/bin/python import os -import datetime -import json -from bson import json_util - -from werkzeug.contrib.atom import AtomFeed -from werkzeug.urls import url_encode from flask import Flask -from flask import abort, make_response, request +from flask import abort, request -from tools.conn import * -from tools.gushim import GUSHIM +from tools.conn import RUNNING_LOCAL from tools.cache import cached, _setup_cache +import tools.helpers as helpers app = Flask(__name__) app.debug = RUNNING_LOCAL # if we're local, keep debug on -#### Helpers #### - -def _get_plans(count=1000, query={}): - return list(db.plans.find(query, limit=count).sort( - [("year", pymongo.DESCENDING), ("month", pymongo.DESCENDING), ("day", pymongo.DESCENDING)])) - - -def _get_gushim(query={}, fields=None): - return list(db.gushim.find(query, fields=fields)) - - -def _create_response_json(data): - """ - Convert dictionary to JSON. json_util.default adds automatic mongoDB result support - """ - r = make_response(json.dumps(data, ensure_ascii=False, default=json_util.default)) - r.headers['Access-Control-Allow-Origin'] = "*" - r.headers['Content-Type'] = "application/json; charset=utf-8" - return r - - -def _create_response_atom_feed(request, plans, feed_title=''): - """ - Create an atom feed of plans fetched from the DB based on an optional query - """ - feed = AtomFeed(feed_title, feed_url=request.url, url=request.url_root) - - for p in plans: - url = p['details_link'] - - # special emphasizing for some statuses - if p['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: - status = u'»»%s««' % p['status'] - else: - status = p['status'] - - content = p['essence'] + ' [' + status + ', ' + '%02d/%02d/%04d' % (p['day'], p['month'], p['year']) + \ - ', ' + p['number'] + ']' - title = p['location_string'] - # 'not title' is not supposed to happen anymore because every plan currently has a location - if not title: - title = p['number'] - - if p['mavat_code'] == '': - links = [{'href' : 'http://www.mavat.moin.gov.il/MavatPS/Forms/SV3.aspx?tid=4&tnumb=' + p['number'], 'rel': 'related', 'title': u'מבא"ת'}] - else: - links = [{'href': '%splan/%s/mavat' % (request.url_root, p['plan_id']), 'rel': 'related', 'title': u'מבא"ת'}] - - feed.add( - title=title, - content=content, - content_type='html', - author="OpenTABA.info", - # id=url + '&status=' + p['status'], - # ^^ it seems like the &tblView= value keeps changing in the URL, which causes the ID to change and dlvr.it to republish items. - id="%s-%s" % (title, p['status']), - # this is a unique ID (not real URL) so adding status to ensure uniqueness in TBA stages - url=url, - links=links, - updated=datetime.date(p['year'], p['month'], p['day']) - ) - - return feed - - #### Cache Helper #### @app.before_first_request @@ -105,18 +34,14 @@ def get_gushim(): get gush_id metadata """ detailed = request.args.get('detailed', '') == 'true' - gushim = _get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) + gushim = helpers._get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) if detailed: # Flatten list of gushim into a dict g_flat = dict((g['gush_id'], {"gush_id": g['gush_id'], "last_checked_at": g['last_checked_at'], "plan_stats": {}}) for g in gushim) # Get plan statistics from DB - stats = db.plans.aggregate([ - {"$unwind" : "$gushim" }, - {"$project": {"gush_id": "$gushim", "status": "$status", "_id": 0}}, - {"$group": {"_id": {"gush_id": "$gush_id", "status": "$status"}, "count": {"$sum": 1}}} - ]) + stats = helpers._get_plan_statistics() # Merge stats into gushim dict for g in stats['result']: @@ -132,7 +57,7 @@ def get_gushim(): # De-flatten our dict gushim = g_flat.values() - return _create_response_json(gushim) + return helpers._create_response_json(gushim) @app.route('/gush/.json') @@ -141,10 +66,10 @@ def get_gush(gush_id): """ get gush_id metadata """ - gush = _get_gushim(query={"gush_id": gush_id}) + gush = helpers._get_gushim(query={"gush_id": gush_id}) if gush is None or len(gush) == 0: abort(404) - return _create_response_json(gush[0]) + return helpers._create_response_json(gush[0]) @app.route('/gush//plans.json') @@ -153,11 +78,11 @@ def get_plans(gush_id): """ get plans from gush_id """ - gush = _get_gushim(query={"gush_id": gush_id}) + gush = helpers._get_gushim(query={"gush_id": gush_id}) if gush is None or len(gush) == 0: abort(404) - return _create_response_json(_get_plans(query={"gushim": gush_id})) + return helpers._create_response_json(helpers._get_plans(query={"gushim": gush_id})) @app.route('/recent.json') @@ -166,7 +91,7 @@ def get_recent_plans(): """ Get the 10 most recent plans to show on the site's home page """ - return _create_response_json(_get_plans(count=10)) + return helpers._create_response_json(helpers._get_plans(count=10)) @app.route('/plans.atom') @@ -177,7 +102,7 @@ def atom_feed(): else: title = u'תב"ע פתוחה' - return _create_response_atom_feed(request, _get_plans(count=20), feed_title=title).get_response() + return helpers._create_response_atom_feed(request, helpers._get_plans(count=20), feed_title=title).get_response() @app.route('/gush//plans.atom') @@ -192,7 +117,7 @@ def atom_feed_gush(gushim): gushim_query = {'gushim': {'$in': gushim}} else: gushim_query = {'gushim': gushim[0]} - return _create_response_atom_feed(request, _get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() + return helpers._create_response_atom_feed(request, helpers._get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() @app.route('/plan//mavat') @@ -203,7 +128,7 @@ def redirect_to_mavat(plan_id): mavat website using an auto-sending form """ try: - plans = _get_plans(count=1, query={'plan_id': int(plan_id)}) + plans = helpers._get_plans(count=1, query={'plan_id': int(plan_id)}) except ValueError: # plan_id is not an int abort(400) except: # DB error @@ -233,7 +158,7 @@ def wakeup(): wake up Heroku dyno from idle. perhaps can if >1 dynos used as endpoint for a "wakeup" request when the client inits """ - return _create_response_json({'morning': 'good'}) + return helpers._create_response_json({'morning': 'good'}) #### MAIN #### diff --git a/requirements.txt b/requirements.txt index 39bc03c..c4214f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,3 @@ times==0.6 wsgiref==0.1.2 logging pylibmc==1.3.0 -facepy==1.0.4 -twitter==1.15.0 -bitly_api==0.3 diff --git a/tools/helpers.py b/tools/helpers.py new file mode 100644 index 0000000..13c8cb2 --- /dev/null +++ b/tools/helpers.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +""" +Helpers for our web and worker (scraper) instances +""" + +from werkzeug.contrib.atom import AtomFeed +from flask import make_response +import json +from bson import json_util +import datetime +import pymongo + +from conn import db + + +def _get_plans(count=1000, query={}): + return list(db.plans.find(query, limit=count).sort( + [("year", pymongo.DESCENDING), ("month", pymongo.DESCENDING), ("day", pymongo.DESCENDING)])) + + +def _get_gushim(query={}, fields=None): + return list(db.gushim.find(query, fields=fields)) + + +def _get_plan_statistics(): + return db.plans.aggregate([ + {"$unwind" : "$gushim" }, + {"$project": {"gush_id": "$gushim", "status": "$status", "_id": 0}}, + {"$group": {"_id": {"gush_id": "$gush_id", "status": "$status"}, "count": {"$sum": 1}}} + ]) + + +def _create_response_json(data): + """ + Convert dictionary to JSON. json_util.default adds automatic mongoDB result support + """ + r = make_response(json.dumps(data, ensure_ascii=False, default=json_util.default)) + r.headers['Access-Control-Allow-Origin'] = "*" + r.headers['Content-Type'] = "application/json; charset=utf-8" + return r + + +def _create_response_atom_feed(request, plans, feed_title=''): + """ + Create an atom feed of plans fetched from the DB based on an optional query + """ + feed = AtomFeed(feed_title, feed_url=request.url, url=request.url_root) + + for p in plans: + formatted = _format_plan(p, request.url_root) + + feed.add( + title=formatted['title'], + content=formatted['content'], + content_type='html', + author="OpenTABA.info", + # id=url + '&status=' + p['status'], + # ^^ it seems like the &tblView= value keeps changing in the URL, which causes the ID to change and dlvr.it to republish items. + id="%s-%s" % (formatted['title'], p['status']), + # this is a unique ID (not real URL) so adding status to ensure uniqueness in TBA stages + url=formatted['url'], + links=formatted['links'], + updated=formatted['last_update'] + ) + + return feed + + +def _format_plan(plan, server_root=None): + """ + Take a plan and format it for atom feed and social networks + """ + formatted_plan = {} + + formatted_plan['url'] = plan['details_link'] + + # special emphasizing for some statuses + if plan['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: + formatted_plan['status'] = u'»»%s««' % plan['status'] + else: + formatted_plan['status'] = plan['status'] + + # the plan's content + formatted_plan['content'] = plan['essence'] + ' [' + formatted_plan['status'] + ', ' + \ + '%02d/%02d/%04d' % (plan['day'], plan['month'], plan['year']) + ', ' + plan['number'] + ']' + + # the title + formatted_plan['title'] = plan['location_string'] + # 'not title' is not supposed to happen anymore because every plan currently has a location + if not formatted_plan['title']: + formatted_plan['title'] = plan['number'] + + # mavat link - if we have a code and the base url for this server (currently only from the atom feed) we can give a direct link + # (through our server). otherwise link to the search page with parameters + if plan['mavat_code'] == '' or server_root is None: + formatted_plan['links'] = [{'href' : 'http://www.mavat.moin.gov.il/MavatPS/Forms/SV3.aspx?tid=4&tnumb=' + plan['number'], 'rel': 'related', 'title': u'מבא"ת'}] + else: + formatted_plan['links'] = [{'href': '%splan/%s/mavat' % (server_root, plan['plan_id']), 'rel': 'related', 'title': u'מבא"ת'}] + + # plan last update + formatted_plan['last_update'] = datetime.date(plan['year'], plan['month'], plan['day']) + + return formatted_plan + + +""" +A small class to enable json-serializing of datetime.date objects +To use it: json.dumps(json_object, cls=helpers.DateTimeEncoder) +""" +class DateTimeEncoder(json.JSONEncoder): + def default(self, obj): + if hasattr(obj, 'isoformat'): + return obj.isoformat() + else: + return json.JSONEncoder.default(self, obj) diff --git a/tools/scrapelib.py b/tools/scrapelib.py index 5ac8601..79c450d 100644 --- a/tools/scrapelib.py +++ b/tools/scrapelib.py @@ -6,14 +6,15 @@ import logging import json import datetime +import os from hashlib import md5 from copy import deepcopy from multiprocessing.pool import ThreadPool -from conn import * +from conn import db, RUNNING_LOCAL from mmi_scrape import get_mmi_gush_json from mavat_scrape import get_mavat_gush_json -from sociallib import post +import sociallib date_pattern = re.compile(r'(\d+/\d+/\d+)') mmi_bad_plan_number_no_slash_pattern = re.compile(ur'^(.*[0-9]+)([א-ת])$') @@ -237,7 +238,7 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): db.plans.insert(plan) # post plan to social networks - post(plan) + sociallib.post(plan) else: # since the plan exists get it's _id and gushim values plan['_id'] = existing_plan['_id'] @@ -251,7 +252,7 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): db.plans.save(plan) # post plan to social networks - post(plan) + sociallib.post(plan) else: # compare the values. maybe the plan wasn't modified at all plan_copy = deepcopy(plan) @@ -264,7 +265,7 @@ def scrape_gush(gush, RUN_FOLDER=False, TESTING=False): db.plans.save(plan) # post plan to social networks - post(plan) + sociallib.post(plan) # just make sure these are deleted because we will probably have quite a few iterations here del plan_copy diff --git a/tools/sociallib.py b/tools/sociallib.py index b711da8..4ca7f69 100644 --- a/tools/sociallib.py +++ b/tools/sociallib.py @@ -1,66 +1,31 @@ # -*- coding: utf-8 -*- -import bitly_api -from facepy import GraphAPI -from twitter import * +import requests import logging import os +from json import dumps + +import helpers as helpers log = logging.getLogger(__name__) def post(plan): - # generate title and content for posts - title = plan['location_string'] - # 'not title' is not supposed to happen anymore because every plan currently has a location - if not title: - title = plan['number'] + if 'SOCIAL_SERVICE_URL' in os.environ.keys(): + # generate a formatted plan + post_data = {'plan': dumps(helpers._format_plan(plan), cls=helpers.DateTimeEncoder)} + + # if we have facebook posting settings, add them + if 'FB_TOKEN' in os.environ.keys() and 'FB_PAGE_ID' in os.environ.keys(): + post_data['fb_tok'] = os.environ['FB_TOKEN'] + post_data['fb_page'] = os.environ['FB_PAGE_ID'] + + # same for twitter settings + if 'TW_TOKEN' in os.environ.keys() and 'TW_TOKEN_SECRET' in os.environ.keys() and 'TW_CONSUMER' in os.environ.keys() and 'TW_CONSUMER_SECRET' in os.environ.keys(): + post_data['tw_tok'] = os.environ['TW_TOKEN'] + post_data['tw_tsec'] = os.environ['TW_TOKEN_SECRET'] + post_data['tw_con'] = os.environ['TW_CONSUMER'] + post_data['tw_csec'] = os.environ['TW_CONSUMER_SECRET'] - # special emphasizing for some statuses - if plan['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: - status = u'»»%s««' % plan['status'] - else: - status = plan['status'] - - content = plan['essence'] + ' [' + status + ', ' + '%02d/%02d/%04d' % (plan['day'], plan['month'], plan['year']) + \ - ', ' + plan['number'] + ']' - - # if bitly access token is defined shorten the link - if 'BITLY_TOKEN' in os.environ.keys(): - try: - b = bitly_api.Connection(access_token=os.environ['BITLY_TOKEN']) - shortie = b.shorten(plan['details_link']) - url = shortie['url'] - except Exception, e: - log.exception('Could not shorten the link using bit.ly - %s', e) - url = plan['details_link'] - else: - url = plan['details_link'] - - # post to facebook page - if 'FB_TOKEN' in os.environ.keys() and 'FB_PAGE_ID' in os.environ.keys(): - try: - graph = GraphAPI(os.environ['FB_TOKEN']) - graph.post( - path = 'v2.2/%s/feed' % os.environ['FB_PAGE_ID'], - message = '%s: %s %s' % (title, content, url), - retry = 10 - ) - except Exception, e: - log.exception('Could not post new plan to facebook page - %s', e) - - # post to twitter feed - if 'TW_TOKEN' in os.environ.keys() and 'TW_TOKEN_SECRET' in os.environ.keys() and 'TW_CONSUMER' in os.environ.keys() and 'TW_CONSUMER_SECRET' in os.environ.keys(): - try: - tweet_content = '%s: %s' % (title, content) - - # shorten our content - max size should be 118, not including the link which will be shortened by twitter if bit.ly is not enabled - if len(tweet_content) > 118: - tweet = '%s... %s' % (tweet_content[0:114], url) - else: - tweet = '%s %s' % (tweet_content, url) - - t = Twitter(auth=OAuth(consumer_key=os.environ['TW_CONSUMER'], consumer_secret=os.environ['TW_CONSUMER_SECRET'], token=os.environ['TW_TOKEN'], token_secret=os.environ['TW_TOKEN_SECRET'])) - t.statuses.update(status=tweet) - except Exception, e: - log.exception('Could not post new plan to twitter feed - %s', e) + # send data to social poster service. we just get an ok and continue, it's up to the service to take care of errors and such + requests.post(os.environ['SOCIAL_SERVICE_URL'], data=post_data) From d58c3b6a438554482a0df00214747c3d558daa5b Mon Sep 17 00:00:00 2001 From: florpor Date: Tue, 2 Dec 2014 14:43:36 +0200 Subject: [PATCH 11/21] removed social requirements from test_requirements.txt --- test_requirements.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/test_requirements.txt b/test_requirements.txt index 68d1014..064fdae 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -16,6 +16,3 @@ rq==0.3.2 #wsgiref==0.1.2 logging pylibmc==1.3.0 -facepy==1.0.4 -twitter==1.15.0 -bitly_api==0.3 From 1b0874513fd320b15ae973a6fc7c7a3988050393 Mon Sep 17 00:00:00 2001 From: florpor Date: Tue, 2 Dec 2014 14:55:54 +0200 Subject: [PATCH 12/21] updated deployment readme --- DEPLOYMENT.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 9513777..a0edef7 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -24,11 +24,14 @@ To change client configuration, you can edit `munis.js` manually later on, accor Index File syntax](http://github.com/niryariv/opentaba-client/blob/master/DEPLOYMENT.md#municipality-index-file). ##Automatic Facebook and Twitter Posting -The server is able to post a plan's content to a Facebook page and Twitter feed every time a plan is created or updated. +The server is able to post a plan's content to a Facebook page and Twitter feed every time a plan is created or updated, using a running instance of [opentaba-poster](https://github.com/florpor/opentaba-poster). To enable this feature, environment variables need to be set on the server with things like access tokens, consumer keys etc. -You can enable Facebook only, Twitter only or both, and can also enable Bit.ly as a link shortener, or have your links posted in their original form. +You can enable Facebook only, Twitter only or both. ###Environemnt Variables +####Poster +First and foremost, to enable social posting the address of the [opentaba-poster](https://github.com/florpor/opentaba-poster) we want to work with must be set. +The name of the variable is `SOCIAL_SERVICE_URL`, and can be set this like so: `heroku config:set SOCIAL_SERVICE_URL="http://127.0.0.1/post" --app opentaba-server-holon` ####Facebook The needed variables for Facebook posting are `FB_TOKEN` and `FB_PAGE_ID`, which correspond to the page access token after you gave the publisher app the `manage_pages` permission, and the page's id. To set them run (opentaba-server-holon is the application name in this example and the ones below): @@ -45,9 +48,6 @@ heroku config:set TW_TOKEN_SECRET="token_secret" --app opentaba-server-holon heroku config:set TW_CONSUMER="consumer" --app opentaba-server-holon heroku config:set TW_CONSUMER_SECRET="consumer_secret" --app opentaba-server-holon ``` -####Bit.ly -If you want links to be shortened before they are posted, you can enable Bit.ly shortening (not a must for neither Facebook nor Twitter posting). -The needed variable is only `BITLY_TOKEN`. Set it by running: `heroku config:set BITLY_TOKEN="token" --app opentaba-server-holon` ###Getting The Tokens There are two helper scripts made to help you authorize the Facebook and Twitter apps, which require manual web authorization, and get your access tokens easily. From db6af05cd2d52f17455be9f39924d2edb4824bd1 Mon Sep 17 00:00:00 2001 From: florpor Date: Mon, 29 Dec 2014 10:54:43 +0200 Subject: [PATCH 13/21] moved social token storage to poster service --- tools/sociallib.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/tools/sociallib.py b/tools/sociallib.py index 4ca7f69..ecebb82 100644 --- a/tools/sociallib.py +++ b/tools/sociallib.py @@ -11,21 +11,10 @@ def post(plan): - if 'SOCIAL_SERVICE_URL' in os.environ.keys(): - # generate a formatted plan - post_data = {'plan': dumps(helpers._format_plan(plan), cls=helpers.DateTimeEncoder)} - - # if we have facebook posting settings, add them - if 'FB_TOKEN' in os.environ.keys() and 'FB_PAGE_ID' in os.environ.keys(): - post_data['fb_tok'] = os.environ['FB_TOKEN'] - post_data['fb_page'] = os.environ['FB_PAGE_ID'] - - # same for twitter settings - if 'TW_TOKEN' in os.environ.keys() and 'TW_TOKEN_SECRET' in os.environ.keys() and 'TW_CONSUMER' in os.environ.keys() and 'TW_CONSUMER_SECRET' in os.environ.keys(): - post_data['tw_tok'] = os.environ['TW_TOKEN'] - post_data['tw_tsec'] = os.environ['TW_TOKEN_SECRET'] - post_data['tw_con'] = os.environ['TW_CONSUMER'] - post_data['tw_csec'] = os.environ['TW_CONSUMER_SECRET'] + if all(param in os.environ.keys() for param in ['POSTER_SERVICE_URL', 'POSTER_ID']): + # generate a formatted plan and the post data + formatted_plan = helpers._format_plan(plan) + post_data = {'poster_id': os.environ['POSTER_ID'], 'title': formatted_plan['title'], 'content': formatted_plan['content'], 'url': formatted_plan['url']} # send data to social poster service. we just get an ok and continue, it's up to the service to take care of errors and such - requests.post(os.environ['SOCIAL_SERVICE_URL'], data=post_data) + requests.post(os.environ['POSTER_SERVICE_URL'], data=post_data) From 88279af8520fcc4f7fa8dc52925b1d192d63a6db Mon Sep 17 00:00:00 2001 From: florpor Date: Mon, 29 Dec 2014 11:40:33 +0200 Subject: [PATCH 14/21] accidentally dropped import at merge --- app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app.py b/app.py index 914489a..f78b7b0 100644 --- a/app.py +++ b/app.py @@ -2,6 +2,7 @@ #!/usr/bin/python import os +import datetime from flask import Flask from flask import abort, request From 1d4365f44c7e6106e3117f79b3005b20b6bf87c4 Mon Sep 17 00:00:00 2001 From: florpor Date: Mon, 29 Dec 2014 12:04:09 +0200 Subject: [PATCH 15/21] moved social token scripts to opentaba-poster and updated deployment readme --- DEPLOYMENT.md | 38 ++++---------------- scripts/get_facebook_token.py | 63 -------------------------------- scripts/get_twitter_token.py | 67 ----------------------------------- scripts/requirements.txt | 2 -- 4 files changed, 6 insertions(+), 164 deletions(-) delete mode 100644 scripts/get_facebook_token.py delete mode 100644 scripts/get_twitter_token.py delete mode 100644 scripts/requirements.txt diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index a0edef7..cd320af 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -28,41 +28,15 @@ The server is able to post a plan's content to a Facebook page and Twitter feed To enable this feature, environment variables need to be set on the server with things like access tokens, consumer keys etc. You can enable Facebook only, Twitter only or both. -###Environemnt Variables +###Environment Variables ####Poster -First and foremost, to enable social posting the address of the [opentaba-poster](https://github.com/florpor/opentaba-poster) we want to work with must be set. -The name of the variable is `SOCIAL_SERVICE_URL`, and can be set this like so: `heroku config:set SOCIAL_SERVICE_URL="http://127.0.0.1/post" --app opentaba-server-holon` -####Facebook -The needed variables for Facebook posting are `FB_TOKEN` and `FB_PAGE_ID`, which correspond to the page access token after you gave the publisher app the `manage_pages` permission, and the page's id. -To set them run (opentaba-server-holon is the application name in this example and the ones below): +To enable social posting, we must be configured to work with an instance of [opentaba-poster](https://github.com/florpor/opentaba-poster). +To do that, we must make sure we are defined as a poster on the opentaba-poster app, and then set two environment variables - +`POSTER_SERVICE_URL` must be set to the url of the opentaba-poster app, and `POSTER_ID` must be set to our assigned id, eg: ``` -heroku config:set FB_TOKEN="token" --app opentaba-server-holon -heroku config:set FB_PAGE_ID="page_id" --app opentaba-server-holon +heroku config:set POSTER_SERVICE_URL="http://poster.service.com/" --app opentaba-server-holon +heroku config:set POSTER_ID="holon_id" --app opentaba-server-holon ``` -####Twitter -The needed variables for Twitter posting are `TW_TOKEN`, `TW_TOKEN_SECRET`, `TW_CONSUMER` and `TW_CONSUMER_SECRET`, which correspond to the access token after you authorized the publiser app, the access token secret and the publisher app's consumer key and consumer secret. -To set them run: -``` -heroku config:set TW_TOKEN="token" --app opentaba-server-holon -heroku config:set TW_TOKEN_SECRET="token_secret" --app opentaba-server-holon -heroku config:set TW_CONSUMER="consumer" --app opentaba-server-holon -heroku config:set TW_CONSUMER_SECRET="consumer_secret" --app opentaba-server-holon -``` - -###Getting The Tokens -There are two helper scripts made to help you authorize the Facebook and Twitter apps, which require manual web authorization, and get your access tokens easily. -Before you can run them there are two things you must do: - 1. Install their required libraries on your environment, ie. `pip install -r scripts/requirements.txt` - 2. Set the app id and app secret on the Facebook script, or consumer key and consumer secret on the Twitter script. These are obviously not provided with the script, and are attainable at both apps' settings pages. - -####Facebook -Run the `scripts/get_facebook_token.py` script, and browse [http://0.0.0.0:8080](http://0.0.0.0:8080). -After authorizing the app, you will be redirected to a page which will list all your pages, their ids and their access tokens. Our server only posts to one page, so pick one and set the environment variables accordingly. -####Twitter -Run the `scripts/get_twitter_token.py` script, and browse [http://0.0.0.0:8080](http://0.0.0.0:8080). -After authorizing the app, you will be redirected to a page with your access token and access token secret. -####Bit.ly -Simply go to the Bit.ly website's [apps page](https://bitly.com/a/oauth_apps) and generate a generic access token, which you can use. ##All Fabric Tasks ###Server diff --git a/scripts/get_facebook_token.py b/scripts/get_facebook_token.py deleted file mode 100644 index 1b2b9a2..0000000 --- a/scripts/get_facebook_token.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Adapted from: http://stackoverflow.com/a/16743363 -This script will run a small web server, redirect you to authorize the Taba Publisher facebook -app to manage_pages permission, extend the access token and print out your page tokens -""" - -import web -from facepy import GraphAPI, utils -from urlparse import parse_qs - -app_id = '' -app_secret = '' - - -class index: - def GET(self): - user_data = web.input(code=None) - - if not user_data.code: - dialog_url = ('http://www.facebook.com/dialog/oauth?' + - 'client_id=' + app_id + - '&redirect_uri=http://0.0.0.0:8080/' + - '&scope=manage_pages') - - return '' - else: - try: - graph = GraphAPI() - response = graph.get( - path='oauth/access_token', - client_id=app_id, - client_secret=app_secret, - redirect_uri='http://0.0.0.0:8080/', - code=user_data.code - ) - data = parse_qs(response) - - extended_token = utils.get_extended_access_token(data['access_token'][0], app_id, app_secret) - graph = GraphAPI(extended_token[0]) - accounts = graph.get(path = 'me/accounts') - result = u'' - result += u'' - - for entry in accounts['data']: - result += u'' - - result += '
NameIdAccess Token
' + unicode(entry['name']) + u'' - result += unicode(entry['id']) + u'' + unicode(entry['access_token']) + u'
' - return result - except Exception, e: - return 'Error: %s' % e - - -if __name__ == '__main__': - if app_id == '' or app_secret == '': - print 'Variables app_id and app_secret must be set to your Facebook app\'s values' - print 'Also, "http://0.0.0.0:8080" has to be set as a valid OAuth redirect URI in your app\'s advanced settings' - else: - print 'Please browse to this address to authorize Taba Publisher:' - app = web.application(url = ('/', 'index'), globals()) - app.run() diff --git a/scripts/get_twitter_token.py b/scripts/get_twitter_token.py deleted file mode 100644 index 6cc922e..0000000 --- a/scripts/get_twitter_token.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Build using code from: https://code.google.com/p/python-twitter/source/browse/get_access_token.py -Copyright 2007 The Python-Twitter Developers , licensed under the Apache License, Version 2.0 - -This script will run a small web server, redirect you to authorize the Taba Publisher twitter -app to your account and print out your access token and secret -""" - -import os -from urlparse import parse_qsl -import oauth2 as oauth -import web -from facepy import GraphAPI, utils -from urlparse import parse_qs - -consumer_key = '' -consumer_secret = '' - - -class index: - def GET(self): - user_data = web.input(oauth_token=None, oauth_verifier=None) - - if not user_data.oauth_token: - oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) - oauth_client = oauth.Client(oauth_consumer) - resp, content = oauth_client.request('https://api.twitter.com/oauth/request_token', 'GET') - - if resp['status'] != '200': - return 'Invalid respond from Twitter requesting temp token: %s' % resp['status'] - else: - request_token = dict(parse_qsl(content)) - - auth_url = ('https://api.twitter.com/oauth/authorize?' + - 'oauth_token=' + request_token['oauth_token'] + - '&oauth_callback=http://0.0.0.0:8080/') - - return '' - else: - token = oauth.Token(user_data.oauth_token, '') - token.set_verifier(user_data.oauth_verifier) - - oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) - oauth_client = oauth.Client(oauth_consumer, token) - resp, content = oauth_client.request('https://api.twitter.com/oauth/access_token', method='POST', body='oauth_callback=oob&oauth_verifier=%s' % user_data.oauth_verifier) - access_token = dict(parse_qsl(content)) - - if resp['status'] != '200': - return 'The request for a Token did not succeed: %s' % resp['status'] - else: - result = u'' - result += u'Access Token: ' + access_token['oauth_token'] - result += u'
Access Token Secret: ' + access_token['oauth_token_secret'] - result += u'' - return result - - -if __name__ == '__main__': - if consumer_key == '' or consumer_secret == '': - print 'Variables consumer_key and consumer_secret must be set to your Twitter app\'s values' - print 'Also, "http://0.0.0.0:8080" has to be set as a valid callback URL in your app\'s settings' - else: - print 'Please browse to this address to authorize Taba Publisher:' - app = web.application(('/', 'index'), globals()) - app.run() diff --git a/scripts/requirements.txt b/scripts/requirements.txt deleted file mode 100644 index 00fa79c..0000000 --- a/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -web.py==0.37 -oauth2==1.5.211 From 03dae801944d98302c8e75f287816adc26ff8000 Mon Sep 17 00:00:00 2001 From: florpor Date: Mon, 29 Dec 2014 19:58:54 +0200 Subject: [PATCH 16/21] changed tools folder to lib. moved create_db and update_db to scripts folder --- .travis.yml | 2 +- DEPLOYMENT.md | 10 +-- Tests/unit_test/test_scrape.py | 2 +- app.py | 104 +++++++++++++++++++++++++++----- {tools => lib}/__init__.py | 0 {tools => lib}/cache.py | 0 {tools => lib}/conn.py | 0 {tools => lib}/gushim.py | 0 {tools => lib}/helpers.py | 0 {tools => lib}/mavat_scrape.py | 0 {tools => lib}/mmi_scrape.py | 0 {tools => lib}/scrapelib.py | 0 {tools => lib}/sociallib.py | 0 readme.md | 2 +- scrape.py | 4 +- {tools => scripts}/create_db.py | 9 ++- scripts/server_fabfile.py | 18 +++--- {tools => scripts}/update_db.py | 5 ++ 18 files changed, 120 insertions(+), 36 deletions(-) rename {tools => lib}/__init__.py (100%) rename {tools => lib}/cache.py (100%) rename {tools => lib}/conn.py (100%) rename {tools => lib}/gushim.py (100%) rename {tools => lib}/helpers.py (100%) rename {tools => lib}/mavat_scrape.py (100%) rename {tools => lib}/mmi_scrape.py (100%) rename {tools => lib}/scrapelib.py (100%) rename {tools => lib}/sociallib.py (100%) rename {tools => scripts}/create_db.py (92%) rename {tools => scripts}/update_db.py (92%) diff --git a/.travis.yml b/.travis.yml index 5aadcdc..7ec51f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,7 @@ install: - pip install Flask==$FLASK before_script: - - python tools/create_db.py --force -m all + - python scripts/create_db.py --force -m all - mkdir filecache - chmod -R 0777 filecache - python scrape.py -g 30649 diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index cd320af..653e4d9 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -6,7 +6,7 @@ you are deploying) To deploy a server/database for a new municipality, follow these steps: 1. Make sure the GeoJSON map file with the name of the municipality has been added to the [map repository](http://github.com/niryariv/israel_gushim) - 2. Run `fab create_server:holon,"חולון"`. This will add the new gush ids to the tools/gushim.py file, create & configure the new Heroku app / MongoDB, and finally run the scraper to get all municipality's plans. + 2. Run `fab create_server:holon,"חולון"`. This will add the new gush ids to the lib/gushim.py file, create & configure the new Heroku app / MongoDB, and finally run the scraper to get all municipality's plans. 3. When the task finishes running, a browser window (or tab) will be open with the new app's scheduler dashboard. Add a new scheduled task with the command: `python scrape.py -g all ; python worker.py`. Do not change dyno settings. @@ -52,9 +52,9 @@ heroku config:set POSTER_ID="holon_id" --app opentaba-server-holon ignore_errors is set to false by default because if this task fails it most likely means the app does not exist to begin with. -+ `fab update_gushim_server:muni_name` Update the [tools/gushim.py](tools/gushim.py) file with the ++ `fab update_gushim_server:muni_name` Update the [lib/gushim.py](lib/gushim.py) file with the gushim of a new municipality or the updated ones of an existing municipality. - This task downloads the gush map file from [israel_gushim](http://github.com/niryariv/israel_gushim), parses its data, and if there are new gushim it updates the [tools/gushim.py](tools/gushim.py) file and the + This task downloads the gush map file from [israel_gushim](http://github.com/niryariv/israel_gushim), parses its data, and if there are new gushim it updates the [lib/gushim.py](lib/gushim.py) file and the [Tests/functional_tests/test_return_json.py](Tests/functional_tests/test_return_json.py) file (with the new amount of gushim), commits and pushes on the master branch. Note that this task does not deploy anywhere, and the new gushim data will not exist on active servers until you deploy changes to them. @@ -65,10 +65,10 @@ heroku config:set POSTER_ID="holon_id" --app opentaba-server-holon + `fab deploy_server_all` Find servers by looking at your `heroku list` and filtering out the ones that don't match our server name pattern. Run deploy_server task on each of the discovered servers. -+ `fab create_db:muni_name` Run the [tools/create_db.py](tools/create_db.py) script on the given ++ `fab create_db:muni_name` Run the [scripts/create_db.py](scripts/create_db.py) script on the given municipality's heroku app. Will only create db for the given municipality's gushim. -+ `fab update_db:muni_name` Run the [tools/update_db.py](tools/update_db.py) script on the given ++ `fab update_db:muni_name` Run the [scripts/update_db.py](scripts/update_db.py) script on the given municipality's heroku app. Will only update db for the given municipality's gushim. + `fab scrape:muni_name,` Run the [scrape.py](scrape.py) script on the diff --git a/Tests/unit_test/test_scrape.py b/Tests/unit_test/test_scrape.py index 6c77d06..919b434 100644 --- a/Tests/unit_test/test_scrape.py +++ b/Tests/unit_test/test_scrape.py @@ -3,7 +3,7 @@ from app import app from nose.tools import eq_, assert_true from nose import with_setup -from tools.scrapelib import scrape_gush +from lib.scrapelib import scrape_gush import os testapp = app.test_client() diff --git a/app.py b/app.py index 445cc78..f1999f8 100644 --- a/app.py +++ b/app.py @@ -3,18 +3,88 @@ import os import datetime +import json +from bson import json_util + +from werkzeug.contrib.atom import AtomFeed +from werkzeug.urls import url_encode from flask import Flask -from flask import abort, request +from flask import abort, make_response, request -from tools.conn import RUNNING_LOCAL -from tools.cache import cached, _setup_cache -import tools.helpers as helpers +from lib.conn import * +from lib.gushim import GUSHIM +from lib.cache import cached, _setup_cache app = Flask(__name__) app.debug = RUNNING_LOCAL # if we're local, keep debug on +#### Helpers #### + +def _get_plans(count=1000, query={}): + return list(db.plans.find(query, limit=count).sort( + [("year", pymongo.DESCENDING), ("month", pymongo.DESCENDING), ("day", pymongo.DESCENDING)])) + + +def _get_gushim(query={}, fields=None): + return list(db.gushim.find(query, fields=fields)) + + +def _create_response_json(data): + """ + Convert dictionary to JSON. json_util.default adds automatic mongoDB result support + """ + r = make_response(json.dumps(data, ensure_ascii=False, default=json_util.default)) + r.headers['Access-Control-Allow-Origin'] = "*" + r.headers['Content-Type'] = "application/json; charset=utf-8" + return r + + +def _create_response_atom_feed(request, plans, feed_title=''): + """ + Create an atom feed of plans fetched from the DB based on an optional query + """ + feed = AtomFeed(feed_title, feed_url=request.url, url=request.url_root) + + for p in plans: + url = p['details_link'] + + # special emphasizing for some statuses + if p['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: + status = u'»»%s««' % p['status'] + else: + status = p['status'] + + content = p['essence'] + ' [' + status + ', ' + '%02d/%02d/%04d' % (p['day'], p['month'], p['year']) + \ + ', ' + p['number'] + ']' + title = p['location_string'] + # 'not title' is not supposed to happen anymore because every plan currently has a location + if not title: + title = p['number'] + + if p['mavat_code'] == '': + links = [{'href' : 'http://www.mavat.moin.gov.il/MavatPS/Forms/SV3.aspx?tid=4&tnumb=' + p['number'], 'rel': 'related', 'title': u'מבא"ת'}] + else: + links = [{'href': '%splan/%s/mavat' % (request.url_root, p['plan_id']), 'rel': 'related', 'title': u'מבא"ת'}] + + feed.add( + title=title, + content=content, + content_type='html', + author="OpenTABA.info", + # id=url + '&status=' + p['status'], + # ^^ it seems like the &tblView= value keeps changing in the URL, which causes the ID to change and dlvr.it to republish items. + id="%s-%s" % (title, p['status']), + # this is a unique ID (not real URL) so adding status to ensure uniqueness in TBA stages + url=url, + links=links, + updated=datetime.date(p['year'], p['month'], p['day']) + ) + + return feed + + #### Cache Helper #### @app.before_first_request @@ -35,14 +105,18 @@ def get_gushim(): get gush_id metadata """ detailed = request.args.get('detailed', '') == 'true' - gushim = helpers._get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) + gushim = _get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) if detailed: # Flatten list of gushim into a dict g_flat = dict((g['gush_id'], {"gush_id": g['gush_id'], "last_checked_at": g['last_checked_at'], "plan_stats": {}}) for g in gushim) # Get plan statistics from DB - stats = helpers._get_plan_statistics() + stats = db.plans.aggregate([ + {"$unwind" : "$gushim" }, + {"$project": {"gush_id": "$gushim", "status": "$status", "_id": 0}}, + {"$group": {"_id": {"gush_id": "$gush_id", "status": "$status"}, "count": {"$sum": 1}}} + ]) # Merge stats into gushim dict for g in stats['result']: @@ -58,7 +132,7 @@ def get_gushim(): # De-flatten our dict gushim = g_flat.values() - return helpers._create_response_json(gushim) + return _create_response_json(gushim) @app.route('/gush/.json') @@ -67,10 +141,10 @@ def get_gush(gush_id): """ get gush_id metadata """ - gush = helpers._get_gushim(query={"gush_id": gush_id}) + gush = _get_gushim(query={"gush_id": gush_id}) if gush is None or len(gush) == 0: abort(404) - return helpers._create_response_json(gush[0]) + return _create_response_json(gush[0]) @app.route('/gush//plans.json') @@ -86,7 +160,7 @@ def get_plans(gushim): else: gushim_query = {'gushim': gushim[0]} - return helpers._create_response_json(helpers._get_plans(query=gushim_query)) + return _create_response_json(_get_plans(query=gushim_query)) @app.route('/recent.json') @@ -95,7 +169,7 @@ def get_recent_plans(): """ Get the 10 most recent plans to show on the site's home page """ - return helpers._create_response_json(helpers._get_plans(count=10)) + return _create_response_json(_get_plans(count=10)) @app.route('/plans.atom') @@ -106,7 +180,7 @@ def atom_feed(): else: title = u'תב"ע פתוחה' - return helpers._create_response_atom_feed(request, helpers._get_plans(count=20), feed_title=title).get_response() + return _create_response_atom_feed(request, _get_plans(count=20), feed_title=title).get_response() @app.route('/gush//plans.atom') @@ -122,7 +196,7 @@ def atom_feed_gush(gushim): else: gushim_query = {'gushim': gushim[0]} - return helpers._create_response_atom_feed(request, helpers._get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() + return _create_response_atom_feed(request, _get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() @app.route('/plans/search/') @@ -142,7 +216,7 @@ def redirect_to_mavat(plan_id): mavat website using an auto-sending form """ try: - plans = helpers._get_plans(count=1, query={'plan_id': int(plan_id)}) + plans = _get_plans(count=1, query={'plan_id': int(plan_id)}) except ValueError: # plan_id is not an int abort(400) except: # DB error @@ -172,7 +246,7 @@ def wakeup(): wake up Heroku dyno from idle. perhaps can if >1 dynos used as endpoint for a "wakeup" request when the client inits """ - return helpers._create_response_json({'morning': 'good'}) + return _create_response_json({'morning': 'good'}) #### MAIN #### diff --git a/tools/__init__.py b/lib/__init__.py similarity index 100% rename from tools/__init__.py rename to lib/__init__.py diff --git a/tools/cache.py b/lib/cache.py similarity index 100% rename from tools/cache.py rename to lib/cache.py diff --git a/tools/conn.py b/lib/conn.py similarity index 100% rename from tools/conn.py rename to lib/conn.py diff --git a/tools/gushim.py b/lib/gushim.py similarity index 100% rename from tools/gushim.py rename to lib/gushim.py diff --git a/tools/helpers.py b/lib/helpers.py similarity index 100% rename from tools/helpers.py rename to lib/helpers.py diff --git a/tools/mavat_scrape.py b/lib/mavat_scrape.py similarity index 100% rename from tools/mavat_scrape.py rename to lib/mavat_scrape.py diff --git a/tools/mmi_scrape.py b/lib/mmi_scrape.py similarity index 100% rename from tools/mmi_scrape.py rename to lib/mmi_scrape.py diff --git a/tools/scrapelib.py b/lib/scrapelib.py similarity index 100% rename from tools/scrapelib.py rename to lib/scrapelib.py diff --git a/tools/sociallib.py b/lib/sociallib.py similarity index 100% rename from tools/sociallib.py rename to lib/sociallib.py diff --git a/readme.md b/readme.md index 2fabcbd..175b9c0 100644 --- a/readme.md +++ b/readme.md @@ -16,7 +16,7 @@ The code is Flask based, working with MongoDB as database, Uses redis to handle Notice that if you are running this on a local dev machine you need to have mongodb running and listening in port 27017 #### Create initial DB - python create_db --force -m [all | ] + python scripts/create_db --force -m [all | ] #### Scrape data into DB diff --git a/scrape.py b/scrape.py index 2b5d4fd..06528f0 100644 --- a/scrape.py +++ b/scrape.py @@ -7,8 +7,8 @@ from optparse import OptionParser, SUPPRESS_HELP from rq import Queue from app import app -from tools.conn import * -from tools.scrapelib import scrape_gush +from lib.conn import * +from lib.scrapelib import scrape_gush from worker import redis_conn diff --git a/tools/create_db.py b/scripts/create_db.py similarity index 92% rename from tools/create_db.py rename to scripts/create_db.py index b5a8b34..73c1118 100644 --- a/tools/create_db.py +++ b/scripts/create_db.py @@ -1,7 +1,12 @@ #!/usr/bin/python -from conn import * -from gushim import GUSHIM +# allow ourselves to import from the parent and current directory +import sys +sys.path.insert(0, '../') +sys.path.insert(0, '.') + +from lib.conn import * +from lib.gushim import GUSHIM from optparse import OptionParser parser = OptionParser() diff --git a/scripts/server_fabfile.py b/scripts/server_fabfile.py index dcd24d2..0cfbc19 100644 --- a/scripts/server_fabfile.py +++ b/scripts/server_fabfile.py @@ -89,7 +89,7 @@ def delete_server(muni_name, ignore_errors=False): @task def update_gushim_server(muni_name): - """Add the gush ids from an existing online gush map to the tools/gushim.py file""" + """Add the gush ids from an existing online gush map to the lib/gushim.py file""" # download the online gush map gush_map = _download_gush_map(muni_name) @@ -102,8 +102,8 @@ def update_gushim_server(muni_name): # make sure we're using the master branch local('git checkout master') - # open and load the existing gushim dictionary from tools/gushim.py - with open(os.path.join('tools', 'gushim.py')) as gushim_data: + # open and load the existing gushim dictionary from lib/gushim.py + with open(os.path.join('lib', 'gushim.py')) as gushim_data: existing_gushim = loads(gushim_data.read().replace('GUSHIM = ', '')) # remove all existing gushim from our new-gushim list, or create a new dictionary entry @@ -121,8 +121,8 @@ def update_gushim_server(muni_name): else: existing_gushim[muni_name]['list'] += gush_ids - # write the dictionary back to tools/gushim.py - out = open(os.path.join('tools', 'gushim.py'), 'w') + # write the dictionary back to lib/gushim.py + out = open(os.path.join('lib', 'gushim.py'), 'w') out.write('GUSHIM = ' + dumps(existing_gushim, sort_keys=True, indent=4, separators=(',', ': '))) out.flush() os.fsync(out.fileno()) @@ -147,13 +147,13 @@ def update_gushim_server(muni_name): out.close() # commit and push to origin - local('git add %s' % os.path.join('tools', 'gushim.py')) + local('git add %s' % os.path.join('lib', 'gushim.py')) local('git add %s' % os.path.join('Tests', 'functional_tests', 'test_return_json.py')) local('git commit -m "added gushim and updated tests for %s"' % muni_name) local('git push origin master') print '*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*' - print 'The new/updated gushim data was added to tools/gushim.py and the test file ' + print 'The new/updated gushim data was added to lib/gushim.py and the test file ' print 'Tests/functional_tests/test_return_json.py was updated.' print 'Both files were successfuly comitted and pushed to origin.' print '*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*X*' @@ -182,7 +182,7 @@ def create_db(muni_name): _heroku_connect() - local('heroku run "python tools/create_db.py --force -m %s" --app %s' % (muni_name, _get_server_full_name(muni_name))) + local('heroku run "python scripts/create_db.py --force -m %s" --app %s' % (muni_name, _get_server_full_name(muni_name))) @task @@ -191,7 +191,7 @@ def update_db(muni_name): _heroku_connect() - local('heroku run "python tools/update_db.py --force -m %s" --app %s' % (muni_name, _get_server_full_name(muni_name))) + local('heroku run "python scripts/update_db.py --force -m %s" --app %s' % (muni_name, _get_server_full_name(muni_name))) @task diff --git a/tools/update_db.py b/scripts/update_db.py similarity index 92% rename from tools/update_db.py rename to scripts/update_db.py index d7f6040..c0a8ae2 100644 --- a/tools/update_db.py +++ b/scripts/update_db.py @@ -1,5 +1,10 @@ #!/usr/bin/env python2 +# allow ourselves to import from the parent and current directory +import sys +sys.path.insert(0, '../') +sys.path.insert(0, '.') + from conn import * from gushim import GUSHIM from optparse import OptionParser From b913aa2ec1bbfed9737e11fe944466b37ba608bb Mon Sep 17 00:00:00 2001 From: florpor Date: Mon, 29 Dec 2014 20:05:04 +0200 Subject: [PATCH 17/21] helpers crawled back to app.py when i merged --- app.py | 93 ++++++++-------------------------------------------------- 1 file changed, 12 insertions(+), 81 deletions(-) diff --git a/app.py b/app.py index f1999f8..73fa6fe 100644 --- a/app.py +++ b/app.py @@ -13,78 +13,13 @@ from flask import abort, make_response, request from lib.conn import * -from lib.gushim import GUSHIM from lib.cache import cached, _setup_cache +import lib.helpers as helpers app = Flask(__name__) app.debug = RUNNING_LOCAL # if we're local, keep debug on -#### Helpers #### - -def _get_plans(count=1000, query={}): - return list(db.plans.find(query, limit=count).sort( - [("year", pymongo.DESCENDING), ("month", pymongo.DESCENDING), ("day", pymongo.DESCENDING)])) - - -def _get_gushim(query={}, fields=None): - return list(db.gushim.find(query, fields=fields)) - - -def _create_response_json(data): - """ - Convert dictionary to JSON. json_util.default adds automatic mongoDB result support - """ - r = make_response(json.dumps(data, ensure_ascii=False, default=json_util.default)) - r.headers['Access-Control-Allow-Origin'] = "*" - r.headers['Content-Type'] = "application/json; charset=utf-8" - return r - - -def _create_response_atom_feed(request, plans, feed_title=''): - """ - Create an atom feed of plans fetched from the DB based on an optional query - """ - feed = AtomFeed(feed_title, feed_url=request.url, url=request.url_root) - - for p in plans: - url = p['details_link'] - - # special emphasizing for some statuses - if p['status'] in [u'פרסום ההפקדה', u'פרסום בעיתונות להפקדה']: - status = u'»»%s««' % p['status'] - else: - status = p['status'] - - content = p['essence'] + ' [' + status + ', ' + '%02d/%02d/%04d' % (p['day'], p['month'], p['year']) + \ - ', ' + p['number'] + ']' - title = p['location_string'] - # 'not title' is not supposed to happen anymore because every plan currently has a location - if not title: - title = p['number'] - - if p['mavat_code'] == '': - links = [{'href' : 'http://www.mavat.moin.gov.il/MavatPS/Forms/SV3.aspx?tid=4&tnumb=' + p['number'], 'rel': 'related', 'title': u'מבא"ת'}] - else: - links = [{'href': '%splan/%s/mavat' % (request.url_root, p['plan_id']), 'rel': 'related', 'title': u'מבא"ת'}] - - feed.add( - title=title, - content=content, - content_type='html', - author="OpenTABA.info", - # id=url + '&status=' + p['status'], - # ^^ it seems like the &tblView= value keeps changing in the URL, which causes the ID to change and dlvr.it to republish items. - id="%s-%s" % (title, p['status']), - # this is a unique ID (not real URL) so adding status to ensure uniqueness in TBA stages - url=url, - links=links, - updated=datetime.date(p['year'], p['month'], p['day']) - ) - - return feed - - #### Cache Helper #### @app.before_first_request @@ -105,18 +40,14 @@ def get_gushim(): get gush_id metadata """ detailed = request.args.get('detailed', '') == 'true' - gushim = _get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) + gushim = helpers._get_gushim(fields={'gush_id': True, 'last_checked_at': True, '_id': False}) if detailed: # Flatten list of gushim into a dict g_flat = dict((g['gush_id'], {"gush_id": g['gush_id'], "last_checked_at": g['last_checked_at'], "plan_stats": {}}) for g in gushim) # Get plan statistics from DB - stats = db.plans.aggregate([ - {"$unwind" : "$gushim" }, - {"$project": {"gush_id": "$gushim", "status": "$status", "_id": 0}}, - {"$group": {"_id": {"gush_id": "$gush_id", "status": "$status"}, "count": {"$sum": 1}}} - ]) + stats = helpers._get_plan_statistics() # Merge stats into gushim dict for g in stats['result']: @@ -132,7 +63,7 @@ def get_gushim(): # De-flatten our dict gushim = g_flat.values() - return _create_response_json(gushim) + return helpers._create_response_json(gushim) @app.route('/gush/.json') @@ -141,10 +72,10 @@ def get_gush(gush_id): """ get gush_id metadata """ - gush = _get_gushim(query={"gush_id": gush_id}) + gush = helpers._get_gushim(query={"gush_id": gush_id}) if gush is None or len(gush) == 0: abort(404) - return _create_response_json(gush[0]) + return helpers._create_response_json(gush[0]) @app.route('/gush//plans.json') @@ -160,7 +91,7 @@ def get_plans(gushim): else: gushim_query = {'gushim': gushim[0]} - return _create_response_json(_get_plans(query=gushim_query)) + return helpers._create_response_json(helpers._get_plans(query=gushim_query)) @app.route('/recent.json') @@ -169,7 +100,7 @@ def get_recent_plans(): """ Get the 10 most recent plans to show on the site's home page """ - return _create_response_json(_get_plans(count=10)) + return helpers._create_response_json(helpers._get_plans(count=10)) @app.route('/plans.atom') @@ -180,7 +111,7 @@ def atom_feed(): else: title = u'תב"ע פתוחה' - return _create_response_atom_feed(request, _get_plans(count=20), feed_title=title).get_response() + return helpers._create_response_atom_feed(request, helpers._get_plans(count=20), feed_title=title).get_response() @app.route('/gush//plans.atom') @@ -196,7 +127,7 @@ def atom_feed_gush(gushim): else: gushim_query = {'gushim': gushim[0]} - return _create_response_atom_feed(request, _get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() + return helpers._create_response_atom_feed(request, helpers._get_plans(query=gushim_query), feed_title=u'תב״ע פתוחה - גוש %s' % ', '.join(gushim)).get_response() @app.route('/plans/search/') @@ -205,7 +136,7 @@ def find_plan(plan_name): """ Find plans that contain the search query and return a json array of their plan and gush ids """ - return _create_response_json(_get_plans(count=3, query={'number': {'$regex': '.*%s.*' % plan_name}})) + return helpers._create_response_json(helpers._get_plans(count=3, query={'number': {'$regex': '.*%s.*' % plan_name}})) @app.route('/plan//mavat') @@ -246,7 +177,7 @@ def wakeup(): wake up Heroku dyno from idle. perhaps can if >1 dynos used as endpoint for a "wakeup" request when the client inits """ - return _create_response_json({'morning': 'good'}) + return helpers._create_response_json({'morning': 'good'}) #### MAIN #### From 87388de6bc62d434d6d94d34b3d29505295c883b Mon Sep 17 00:00:00 2001 From: florpor Date: Thu, 1 Jan 2015 12:10:48 +0200 Subject: [PATCH 18/21] added sync_poster script --- DEPLOYMENT.md | 5 ++- fabfile.py | 2 +- scripts/server_fabfile.py | 8 ++++ scripts/sync_poster.py | 83 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 scripts/sync_poster.py diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 653e4d9..02b6ee7 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -81,8 +81,11 @@ heroku config:set POSTER_ID="holon_id" --app opentaba-server-holon by our naming pattern. Run the renew_db task on each one discovered. + `fab refresh_db:muni_name` Run the update_db and then the scrape tasks on the given municipality's heroku app. -+ `fab refresh_db_all` Find servers by looing at your `heroku list` and filtering ++ `fab refresh_db_all` Find servers by looking at your `heroku list` and filtering by our naming pattern. Run the refresh_db task on each one discovered. ++ `fab sync_poster:muni_name,min_date` Run the [scripts/sync_poster.py](scripts/sync_poster.py) script on the given + municipality's heroku app. min_date is the minimum date of plans to post, + and should be of the format: 1/1/2015. ###Client + `fab create_client:muni_name,"display_name"` For client creation, all we need diff --git a/fabfile.py b/fabfile.py index d7f73ad..8158a47 100644 --- a/fabfile.py +++ b/fabfile.py @@ -9,7 +9,7 @@ from scripts.client_fabfile import create_client from scripts.server_fabfile import create_server, delete_server, update_gushim_server, deploy_server, deploy_server_all, create_db -from scripts.server_fabfile import update_db, scrape, renew_db, renew_db_all, refresh_db, refresh_db_all +from scripts.server_fabfile import update_db, scrape, renew_db, renew_db_all, refresh_db, refresh_db_all, sync_poster @task diff --git a/scripts/server_fabfile.py b/scripts/server_fabfile.py index 0cfbc19..6cdd9fb 100644 --- a/scripts/server_fabfile.py +++ b/scripts/server_fabfile.py @@ -237,3 +237,11 @@ def refresh_db_all(): for server in _get_servers(): refresh_db(_get_muni_name(server)) + +@task +def sync_poster(muni_name, min_date): + """Run the sync_poster script file on a certain heroku app""" + + _heroku_connect() + + local('heroku run "python scripts/sync_date.py -m %s -q" --app %s' % (min_date, _get_server_full_name(muni_name))) diff --git a/scripts/sync_poster.py b/scripts/sync_poster.py new file mode 100644 index 0000000..7335e9e --- /dev/null +++ b/scripts/sync_poster.py @@ -0,0 +1,83 @@ +#!/usr/bin/python + +# allow ourselves to import from the parent and current directory +import sys +sys.path.insert(0, '../') +sys.path.insert(0, '.') + +import os +import datetime +from optparse import OptionParser +from time import sleep +import requests + +from lib.conn import * +import lib.helpers as helpers +from lib.sociallib import post + +# can't communicate with poster service without these +if not all(param in os.environ.keys() for param in ['POSTER_SERVICE_URL', 'POSTER_ID']): + print 'Environment variables POSTER_SERVICE_URL and POSTER_ID must both be set!' + exit(1) + +parser = OptionParser() +parser.add_option('-m', dest='min_date', help='minimum date for plans to be sent to poster service. if not supplied, ALL plans will be sent. format: 1/1/2015') +parser.add_option('-q', dest='quiet', default=False, action='store_true', help='quiet, don\'t prompt for user approval') +parser.add_option('-d', dest='dont_wait', default=False, action='store_true', help='don\'t wait for poster service to post everything') + +(options, args) = parser.parse_args() + +if options.min_date: + # make sure the min_date parses fine + try: + min_date = datetime.datetime.strptime(options.min_date, '%d/%m/%Y') + except: + print 'Invalid minimum date. Format is 1/1/2015' + exit(1) + + # build min_date query + plans_query = {'$or': [ {'year': {'$gt': min_date.year}}, {'year': min_date.year, 'month': {'$gt': min_date.month}}, {'year': min_date.year, 'month': min_date.month, 'day': {'$gte': min_date.day}} ]} +else: + # no query + plans_query = {} + +# get valid plans +plans = helpers._get_plans(query=plans_query) + +# if not quiet, make sure the user is ok with this +if not options.quiet: + while 1: + if not options.min_date: + sys.stdout.write('No minimum date was supplied.\nAre you sure you want ALL %s plans to be synced? [y/N] ' % len(plans)) + else: + sys.stdout.write('Are you sure you want %s plans to be synced? [y/N] ' % len(plans)) + + choice = raw_input().lower() + if choice == 'n' or choice == 'no': + exit() + elif choice == 'y' or choice == 'yes': + break + +print 'Posting plans... (may take up to a few minutes, depending on how many are sent)' + +# reverse the list so that we send the service the earlier plans first (service's queue is fifo) +for plan in reversed(plans): + post(plan) + +if not options.dont_wait: + status = 10 + + while status > 1: + print 'Poking poster service for status...' + + r = requests.get('%s/status' % os.environ['POSTER_SERVICE_URL'].rstrip('/')) + for s in r.text.split(): + if s.isdigit(): + status = int(s) + + print 'Approxiamtely %s posts remaining...' % status + + # wait for 30 seconds then poke again + sleep(30) + + print 'Poster done!' From 4140562aa05829febb89c525f934222b2611e5f7 Mon Sep 17 00:00:00 2001 From: florpor Date: Thu, 1 Jan 2015 12:11:11 +0200 Subject: [PATCH 19/21] forgot to fix update_db script's imports --- scripts/update_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/update_db.py b/scripts/update_db.py index c0a8ae2..62e5300 100644 --- a/scripts/update_db.py +++ b/scripts/update_db.py @@ -5,8 +5,8 @@ sys.path.insert(0, '../') sys.path.insert(0, '.') -from conn import * -from gushim import GUSHIM +from lib.conn import * +from lib.gushim import GUSHIM from optparse import OptionParser parser = OptionParser() From 17cd2330b1b2152cfc3dcd487e66f80ca86f7c8e Mon Sep 17 00:00:00 2001 From: florpor Date: Thu, 1 Jan 2015 17:10:59 +0200 Subject: [PATCH 20/21] POSTER_SERVICE_URL shouldn't have the /post part, then we can properly query /status --- lib/sociallib.py | 2 +- scripts/sync_poster.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/sociallib.py b/lib/sociallib.py index ecebb82..d2d12e8 100644 --- a/lib/sociallib.py +++ b/lib/sociallib.py @@ -17,4 +17,4 @@ def post(plan): post_data = {'poster_id': os.environ['POSTER_ID'], 'title': formatted_plan['title'], 'content': formatted_plan['content'], 'url': formatted_plan['url']} # send data to social poster service. we just get an ok and continue, it's up to the service to take care of errors and such - requests.post(os.environ['POSTER_SERVICE_URL'], data=post_data) + requests.post('%s/post' % os.environ['POSTER_SERVICE_URL'].rstrip('/'), data=post_data) diff --git a/scripts/sync_poster.py b/scripts/sync_poster.py index 7335e9e..957c0c4 100644 --- a/scripts/sync_poster.py +++ b/scripts/sync_poster.py @@ -68,16 +68,17 @@ status = 10 while status > 1: + # wait for 15 seconds then poke + sleep(15) + print 'Poking poster service for status...' + # get the /status page and parse the number in the output r = requests.get('%s/status' % os.environ['POSTER_SERVICE_URL'].rstrip('/')) for s in r.text.split(): if s.isdigit(): status = int(s) - print 'Approxiamtely %s posts remaining...' % status - - # wait for 30 seconds then poke again - sleep(30) + print 'Approximately %s posts remaining...' % status print 'Poster done!' From 35346529d1147f2b5b272d335f88091b28ec4a75 Mon Sep 17 00:00:00 2001 From: florpor Date: Thu, 1 Jan 2015 17:18:56 +0200 Subject: [PATCH 21/21] fix tests --- Tests/functional_tests/test_return_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/functional_tests/test_return_json.py b/Tests/functional_tests/test_return_json.py index 8998bfe..9321f0f 100644 --- a/Tests/functional_tests/test_return_json.py +++ b/Tests/functional_tests/test_return_json.py @@ -97,7 +97,7 @@ def test_api_get_plan(): eq_(response.mimetype, 'application/json') # I don't know the correct number, since it's changes with each update, but it should be more then this - assert_true(len(j) >= 19) + assert_true(len(j) >= 17) def test_api_wakeup():