-
Notifications
You must be signed in to change notification settings - Fork 5
/
collector.py
44 lines (35 loc) · 971 Bytes
/
collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import urlparse
import re
twitter = re.compile('^http://twitter.com/(#!/)?(?P<account>[a-zA-Z0-9_]{1,15})$')
def collect(urls):
collection = {'twitter':{}}
for url in urls :
up = urlparse.urlparse(url)
hostname = up.hostname
if hostname == None:
continue
if hostname == 'www.facebook.com':
pass
elif hostname == 'twitter.com':
m = twitter.match(url)
if m:
gs = m.groupdict()
if 'account' in gs:
if gs['account'] != 'share': # this is not an account, although http://twitter.com/#!/share says that this account is suspended.
collection['twitter'][gs['account']] = url
elif hostname == 'www.linkedin.com':
pass
elif hostname == 'plus.google.com':
pass
elif hostname == 'www.slideshare.net':
pass
elif hostname == 'www.youtube.com':
pass
elif hostname == 'www.flickr.com':
pass
elif hostname[-9:] == '.xing.com':
pass
else:
continue
# collection.append(up.hostname)
return collection