pyweibo.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from argparse import ArgumentParser
import datetime, os, sys
from ConfigParser import SafeConfigParser
import lib.Pyweibo as Pyweibo

# sys.modules.keys()​​​

#config stuff
config = SafeConfigParser()
config.read( os.path.join(os.getcwd() + os.sep +  'settings.py') )


def main():
    desc ="PyWeibo is a crawler and visualization tool for Sina Weibo"
    usage="""
        pyweibo <crawl / api> [-a 'action'] [-u 'URL'] [options] 
        """
    parser = ArgumentParser(usage=usage, version=" 0.1", description=desc)

    # Chose Crawler or API
    parser.add_argument("method", help="crawl or api")
    
    # Chose an action
    parser.add_argument('-a', "--action", 
                      help='Select a way to process the data', 
                      dest='action', 
                      action='store',
                      metavar='<coms/RT/map/graph/tag/feel>',
                      required=True)

    parser.add_argument('-u', "--url", 
                      help='Select an URL to process', 
                      dest='url', 
                      action='store',
                      metavar='<url>')

    # Storage
    '''
    parser.add_argument("-d", "--datatype",
                      action="store",
                      dest="db",
                      default=False,
                      help="select between json, MongoDB or Redis to store raw data")

    # Graph options
    parser.add_argument("-o", "--outputfile",
                      action="store", # optional because action defaults to "store"
                      dest="outputfile",
                      default="./out/graph",
                      help="Name of the file to write graph info (time tag will be added). If empty, a name is generated by default from URL",
                      metavar='<FILENAME>')

    parser.add_argument("-g", "--graphtype",
                      action="store", # optional because action defaults to "store"
                      dest="graphtype",
                      default="dot",
                      help="chose Graph file type: .dot or .gdf (for Gephi). Default: .dot",
                      metavar='<graphtype>')

    # Crawler options
    parser.add_argument("-l", "--level",
                      dest="level",
                      default="2",
                      help="crawler depth",
                      type=int,
                      metavar='<graphtype>')

    parser.add_argument("-m", "--max",
                      dest="max",
                      default="10000",
                      help="Maximum number of posts to crawl",
                      type=int,
                      metavar='<graphtype>')
    '''
    
    args = parser.parse_args()


    # validate Sina Weibo URL (to do)
    # if args.URL
    # if args.URL == 'uid':
    # pyweibo.getPersonalProfile(args.URL)

    # Crawler options
    if args.method == "crawl":

      # Naming graph file
      graphext = ''
      filename = ''

      if args.graphtype == "gdf":
        graphext="gdf"
      else:
        graphext="dot" #default 

      if args.outputfile:
        filename=args.outputfile
      else: 
        # Generate safe name from post URL
        posturl = args.URL
        urlparts = posturl.split('/')
        filename = 'post_%s_%s'%(urlparts[3],urlparts[4])

      nameFile(filename, graphext)

      # Build request
      level=2
      maxposts=100

      if args.max:
        maxposts = args.max
      if args.level:
        level = args.level

    # Storage
    # if args.db == False:
    #     # check if filename 
    #     print "Data will be written to local file 'out/data.json' "

    # elif args.db == "mongo":
    #     print "Data will be stored to Mongo"

    # elif args.db == "redis":
    #     print "Data will be stored to Redis"

    # CRAWLER ACTION    
    if args.method == "crawl":

      pyweibo = Pyweibo.Pyweibo() # init

      if args.action=="map":
          print "You asked for a repost map of %s"%(args.url)
          
          pyweibo.generateRepostMap(args.url, level, maxposts)
          # print(pyweibo)

      elif args.action=="tag":
          print "You asked for a report comments of %s"%(args.url)

      elif args.action=="map":
          print "You asked for a social graph of profile %s"%(args.url)

      elif args.action=="feel":
          print "You asked for a sentiment analysis"

    # API ACTIONS
    elif args.method == "api":
      # print sys.modules

      pyweibo = Pyweibo.Pyweibo() # init
      # print ('Get API methods')
      # pyapi = WeiboAPI.WeiboAPI()

      # generate token
      if args.action=="token":
        print "Token will be created."
        # print pyapi
        pyweibo.getToken()

        # PyApi.token()
      elif args.action=="resume":
        pyweibo.resumePostData()

      elif args.action=="coms" or args.action=="RT":

        if args.url:
          url = args.url

          # get post ID from url
          # postId = pyweibo.getPostIdFromUrl(url)
          postId ="3534311687371818"

          # get comments data
          format= "json"
          filename= nameFile('post_%s'%(postId), format)

          # print filename
          pyweibo.getPostData(args.action, postId, filename, format)

        else:

          parser.error( 'You should input an URL to map using -u <URL>')
        # 


      else:
        print "Unknown actions. Please check -a value."  


    else:
      print ('Unknow method, please use "api" or "crawl".')

    print args


def nameFile(filename,extension):

  suffix = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
  host = config.get('files', 'path')
  tmp = "_".join([filename, suffix])
  name = host+os.sep+tmp
  print name
  return name

if __name__ == '__main__':
    main()