From 54f8938e3095633d8cf0d40855118774c49ddd8b Mon Sep 17 00:00:00 2001
From: Tim Chan <timychann@gmail.com>
Date: Sat, 7 Jul 2018 13:46:03 +1000
Subject: [PATCH 1/3] Add in basic twitter archive to Corpus functionality

---
 .gitignore                |  3 +++
 local_settings_example.py |  4 ++++
 twittereater.py           | 17 +++++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 twittereater.py

diff --git a/.gitignore b/.gitignore
index 390be07..fa0d0b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,6 @@
 .git
 /.idea
 __pycache__
+*.csv
+.env/
+local_settings.py
\ No newline at end of file
diff --git a/local_settings_example.py b/local_settings_example.py
index 1760b3d..fdaabce 100644
--- a/local_settings_example.py
+++ b/local_settings_example.py
@@ -34,3 +34,7 @@
 
 DEBUG = True  # Set this to False to start Tweeting live
 TWEET_ACCOUNT = ""  # The name of the account you're tweeting to.
+
+#Configuration for Twitter parser. TEST_SOURCE will be re-used as as the corpus location.
+TWITTER_ARCHIVE_NAME = "tweets.csv" #Name of your twitter archive
+IGNORE_RETWEETS = True #If you want to remove retweets
\ No newline at end of file
diff --git a/twittereater.py b/twittereater.py
new file mode 100644
index 0000000..1ff8adb
--- /dev/null
+++ b/twittereater.py
@@ -0,0 +1,17 @@
+import csv
+from local_settings import TWITTER_ARCHIVE_NAME, TEST_SOURCE, IGNORE_RETWEETS
+
+f = open(TWITTER_ARCHIVE_NAME, 'r', encoding='utf-8')
+tweets = []
+reader = csv.reader(f,quotechar='"')
+next(reader) #get rid of the twitter header
+
+
+tweetarchive = open(TEST_SOURCE, 'w')
+for row in reader:
+    if IGNORE_RETWEETS:
+        if not row[8]: #9th column is the timestamp of the retweet
+            tweetarchive.write("'%s'," % (row[5]))
+    else:
+        tweetarchive.write("'%s'," % (row[5]))
+

From f1861f388118ac29cf399b64e2e6cbe89279ca44 Mon Sep 17 00:00:00 2001
From: Tim Chan <timychann@gmail.com>
Date: Tue, 31 Jul 2018 20:02:44 +1000
Subject: [PATCH 2/3] Update twittereater for python 2.7 compatibility

Updated twittereater.py for python 2.7 compatibility
Updated readme.md to include twitter archive instructions
Updated contributors.md
---
 CONTRIBUTORS.md | 3 ++-
 README.md       | 9 +++++++++
 twittereater.py | 3 ++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 4ab5703..30fa420 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -10,4 +10,5 @@
 * [varjmes](https://github.com/varjmes)
 * [meggle](https://github.com/meggle)
 * [superstrong](https://github.com/superstrong)
-* [andrlik](https://github.com/andrlik)
\ No newline at end of file
+* [andrlik](https://github.com/andrlik)
+* [TimlChan](https://github.com/TimLChan)
diff --git a/README.md b/README.md
index c6e4b22..ffe601b 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,15 @@ To scrape content from the web, set `SCRAPE_URL` to `True`. This bot makes use o
 
 __Note:__ Web scraping is experimental and may give you unexpected results. Make sure to test the bot in debugging mode before publishing.
 
+#### Twitter archive
+To use tweets from a Twitter account you have access to, you can download your Twitter Archive by following the steps from [Twitter's Help Center](https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive).
+
+1. Request your Twitter archive
+2. Extract the CSV file and ensure it is named the same as the `TWITTER_ARCHIVE_NAME` in `local_settings.py`
+3. In `local_settings.py`, retweets are ignored by default. If you want to include retweets in your corpus, change `IGNORE_RETWEETS` to `False`.
+4. Once that is all set, run `twittereater.py` and it will automatically create a corpus file based on the `TEST_SOURCE` variable in `local_settings.py`
+
+
 ## Debugging
 
 If you want to test the script or to debug the tweet generation, you can skip the random number generation and not publish the resulting tweets to Twitter.
diff --git a/twittereater.py b/twittereater.py
index 1ff8adb..6f69008 100644
--- a/twittereater.py
+++ b/twittereater.py
@@ -1,7 +1,8 @@
+# -*- coding: utf-8 -*-
 import csv
 from local_settings import TWITTER_ARCHIVE_NAME, TEST_SOURCE, IGNORE_RETWEETS
 
-f = open(TWITTER_ARCHIVE_NAME, 'r', encoding='utf-8')
+f = open(TWITTER_ARCHIVE_NAME, 'r')
 tweets = []
 reader = csv.reader(f,quotechar='"')
 next(reader) #get rid of the twitter header

From fea6f3c4544454fd952f3db57fdc607570fd35bc Mon Sep 17 00:00:00 2001
From: Tim Chan <timychann@gmail.com>
Date: Tue, 31 Jul 2018 20:07:46 +1000
Subject: [PATCH 3/3] Update readme.md

Add in an optional step to use the Twitter archive corpus as tweet source
---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ffe601b..6d5416c 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,10 @@ To use tweets from a Twitter account you have access to, you can download your T
 1. Request your Twitter archive
 2. Extract the CSV file and ensure it is named the same as the `TWITTER_ARCHIVE_NAME` in `local_settings.py`
 3. In `local_settings.py`, retweets are ignored by default. If you want to include retweets in your corpus, change `IGNORE_RETWEETS` to `False`.
-4. Once that is all set, run `twittereater.py` and it will automatically create a corpus file based on the `TEST_SOURCE` variable in `local_settings.py`
+4. Update `TEST_SOURCE` and specify the name of the parsed Twitter archive
+5. Once that is all set, run `twittereater.py` and it will automatically create a corpus file based on the `TEST_SOURCE` variable in `local_settings.py`
+
+If you want to use the Twitter corpus to generate tweets, set `STATIC_TEST = True`
 
 
 ## Debugging