-
Notifications
You must be signed in to change notification settings - Fork 1
/
tweetsData.R
48 lines (39 loc) · 1.64 KB
/
tweetsData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#| Written in Windows 7, 64 bit
#| R version 2.15.2
#| By Rolf Fredheim ([email protected])
#|_____________________________________________________________
#| Required libraries:
#| library(cldr) #language detection
#| library(dismo) #To access geo location API
#| library(lubridate) #date formatting
#| library(plyr) #data reshaping
#| library(rjson) #for Twitter
#| library(RJSONIO) #for Twitter
#| library(stringr) #reshaping data
#| library(twitteR) #accessing twitter API
#| library(XML) #html parsing
#| library(RCurl) #fetching data from internet
#|_____________________________________________________________
#Set working directory to root folder:
setwd("yourRootHere")
#Load the required functions
source("rFunctions/twitterFunctions.R")
#Load the required packages. Will install any unavailable packages
loader("yandexData")
#To handle cyrillic characters
Sys.setlocale("LC_CTYPE","russian")
#REQUIRES TWITTER API
#load stored authentication data
#If you have not set up Twitter API access, follow the walkthrough here:
#WALKTHROUGH
load("c:/Users/Rolf/documents/cred.Rdata") #EDIT
registerTwitterOAuth(Cred)
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
#Data processing, shaping OR load the data
df <- importYandex("tweets/")
load("tweetData/extractedTweets.Rdata")
tt2 <- yandexFormat(df)
#Assumes yandex html files
#takes a directory, converts all html pages in that directory into tweets in a data frame
#DIR -> data.frame
#Requires activated twitter API and cacert.pem loaded