forked from OdinLin/Pyweibo
-
Notifications
You must be signed in to change notification settings - Fork 2
/
pyweibo.py
209 lines (156 loc) · 5.66 KB
/
pyweibo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
import datetime, os, sys
from ConfigParser import SafeConfigParser
import lib.Pyweibo as Pyweibo
# sys.modules.keys()
#config stuff
config = SafeConfigParser()
config.read( os.path.join(os.getcwd() + os.sep + 'settings.py') )
def main():
desc ="PyWeibo is a crawler and visualization tool for Sina Weibo"
usage="""
pyweibo <crawl / api> [-a 'action'] [-u 'URL'] [options]
"""
parser = ArgumentParser(usage=usage, version=" 0.1", description=desc)
# Chose Crawler or API
parser.add_argument("method", help="crawl or api")
# Chose an action
parser.add_argument('-a', "--action",
help='Select a way to process the data',
dest='action',
action='store',
metavar='<coms/RT/map/graph/tag/feel>',
required=True)
parser.add_argument('-u', "--url",
help='Select an URL to process',
dest='url',
action='store',
metavar='<url>')
# Storage
'''
parser.add_argument("-d", "--datatype",
action="store",
dest="db",
default=False,
help="select between json, MongoDB or Redis to store raw data")
# Graph options
parser.add_argument("-o", "--outputfile",
action="store", # optional because action defaults to "store"
dest="outputfile",
default="./out/graph",
help="Name of the file to write graph info (time tag will be added). If empty, a name is generated by default from URL",
metavar='<FILENAME>')
parser.add_argument("-g", "--graphtype",
action="store", # optional because action defaults to "store"
dest="graphtype",
default="dot",
help="chose Graph file type: .dot or .gdf (for Gephi). Default: .dot",
metavar='<graphtype>')
# Crawler options
parser.add_argument("-l", "--level",
dest="level",
default="2",
help="crawler depth",
type=int,
metavar='<graphtype>')
parser.add_argument("-m", "--max",
dest="max",
default="10000",
help="Maximum number of posts to crawl",
type=int,
metavar='<graphtype>')
'''
args = parser.parse_args()
# validate Sina Weibo URL (to do)
# if args.URL
# if args.URL == 'uid':
# pyweibo.getPersonalProfile(args.URL)
# Crawler options
if args.method == "crawl":
# Naming graph file
graphext = ''
filename = ''
if args.graphtype == "gdf":
graphext="gdf"
else:
graphext="dot" #default
if args.outputfile:
filename=args.outputfile
else:
# Generate safe name from post URL
posturl = args.URL
urlparts = posturl.split('/')
filename = 'post_%s_%s'%(urlparts[3],urlparts[4])
nameFile(filename, graphext)
# Build request
level=2
maxposts=100
if args.max:
maxposts = args.max
if args.level:
level = args.level
# Storage
# if args.db == False:
# # check if filename
# print "Data will be written to local file 'out/data.json' "
# elif args.db == "mongo":
# print "Data will be stored to Mongo"
# elif args.db == "redis":
# print "Data will be stored to Redis"
# CRAWLER ACTION
if args.method == "crawl":
pyweibo = Pyweibo.Pyweibo() # init
if args.action=="map":
print "You asked for a repost map of %s"%(args.url)
pyweibo.generateRepostMap(args.url, level, maxposts)
# print(pyweibo)
elif args.action=="tag":
print "You asked for a report comments of %s"%(args.url)
elif args.action=="map":
print "You asked for a social graph of profile %s"%(args.url)
elif args.action=="feel":
print "You asked for a sentiment analysis"
# API ACTIONS
elif args.method == "api":
# print sys.modules
pyweibo = Pyweibo.Pyweibo() # init
# print ('Get API methods')
# pyapi = WeiboAPI.WeiboAPI()
# generate token
if args.action=="token":
print "Token will be created."
# print pyapi
pyweibo.getToken()
# PyApi.token()
elif args.action=="resume":
pyweibo.resumePostData()
elif args.action=="coms" or args.action=="RT":
if args.url:
url = args.url
# get post ID from url
# postId = pyweibo.getPostIdFromUrl(url)
postId ="3534311687371818"
# get comments data
format= "json"
filename= nameFile('post_%s'%(postId), format)
# print filename
pyweibo.getPostData(args.action, postId, filename, format)
else:
parser.error( 'You should input an URL to map using -u <URL>')
#
else:
print "Unknown actions. Please check -a value."
else:
print ('Unknow method, please use "api" or "crawl".')
print args
def nameFile(filename,extension):
suffix = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
host = config.get('files', 'path')
tmp = "_".join([filename, suffix])
name = host+os.sep+tmp
print name
return name
if __name__ == '__main__':
main()