diff --git a/twitter_scraper.py b/twitter_scraper.py index 7e71b96..b549f7f 100644 --- a/twitter_scraper.py +++ b/twitter_scraper.py @@ -32,7 +32,13 @@ def gen_tweets(pages): dot = "." tweets = [] for tweet in html.find('.stream-item'): - text = tweet.find('.tweet-text')[0].full_text + tweet_text_elements = tweet.find('.tweet-text') + if not tweet_text_elements: + continue + text_container = tweet_text_elements[0] + for hidden_child in text_container.lxml.find_class('u-hidden'): + hidden_child.drop_tree() + text = text_container.full_text.strip() tweetId = tweet.find( '.js-permalink')[0].attrs['data-conversation-id'] time = datetime.fromtimestamp(