-
Notifications
You must be signed in to change notification settings - Fork 0
/
getwp
executable file
·49 lines (39 loc) · 1.21 KB
/
getwp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#! /usr/bin/env python
import argparse
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
import feedparser
import json
import sys
from pprint import pprint
def main(args):
parser = argparse.ArgumentParser(
description="Retrieve worpress posts.",
)
parser.add_argument(
'target',
help='Target user to pull public data from.',
)
options = parser.parse_args(args)
print(json.dumps(get_posts(options.target)))
def get_posts(url):
full_url = '%s/feed/' % url
wp = feedparser.parse(full_url)
entries = []
for entry in wp.entries:
if 'content' in entry.keys():
content = entry['content']
if not isinstance(content, list):
content = [entry['content']]
else:
content = entry['content']
for post in content:
post = post['value']
soup = BeautifulSoup(
post,
convertEntities=BeautifulSoup.HTML_ENTITIES,
)
post = ''.join(soup.findAll(text=True)).replace('\n', '')
entries.append(post)
return entries
if __name__ == '__main__':
main(sys.argv[1:])