forked from trevorhobenshield/twitter-api-client
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add new GraphQL endpoint
/SearchTimeline
. `api.twitter.com/2/search…
…/adaptive.json` deprecated
- Loading branch information
1 parent
55425b7
commit afba99f
Showing
6 changed files
with
275 additions
and
158 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import re | ||
import pandas as pd | ||
from twitter.util import find_key | ||
|
||
|
||
def get_tweets(data: list | dict, cols: list[str] = None): | ||
""" | ||
Convert raw GraphQL response to DataFrame | ||
@param data: tweets | ||
@param cols: option to only include certain columns | ||
@return: DataFrame of tweets | ||
""" | ||
entries = [y for x in find_key(data, 'entries') for y in x] | ||
# filter out promoted tweets | ||
tweets = [x for x in entries if not x.get('entryId').startswith('promoted')] | ||
df = ( | ||
pd.json_normalize(find_key(tweets, 'tweet_results'), max_level=1) | ||
['result.legacy'].apply(pd.Series) | ||
.dropna(subset='user_id_str') | ||
.assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y")) | ||
.sort_values('created_at', ascending=False) | ||
.reset_index(drop=True) | ||
) | ||
numeric = [ | ||
'user_id_str', | ||
'id_str', | ||
'favorite_count', | ||
'quote_count', | ||
'reply_count', | ||
'retweet_count', | ||
] | ||
df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce') | ||
cols = cols or [ | ||
'id_str', | ||
'user_id_str', | ||
'created_at', | ||
'full_text', | ||
'favorite_count', | ||
'quote_count', | ||
'reply_count', | ||
'retweet_count', | ||
'lang', | ||
] | ||
return df[cols] | ||
|
||
|
||
def get_tweets_urls(data: dict | list, expr: str, cols: list[str] = None) -> pd.DataFrame: | ||
""" | ||
Convert raw GraphQL response to DataFrame | ||
Search for tweets containing specific urls by regex | ||
@param data: tweets | ||
@param expr: regex to match urls | ||
@param cols: option to only include certain columns | ||
@return: DataFrame of tweets matching the expression | ||
""" | ||
tweet_results = find_key(data, 'tweet_results') | ||
results = [] | ||
for res in tweet_results: | ||
legacy = res.get('result', {}).get('legacy', {}) | ||
urls = find_key(res, 'expanded_url') | ||
if any(re.search(expr, x) for x in urls): | ||
results.append({'urls': urls} | legacy) | ||
try: | ||
df = ( | ||
pd.DataFrame(results) | ||
.assign(date=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y")) | ||
.sort_values('created_at', ascending=False) | ||
.reset_index(drop=True) | ||
) | ||
numeric = [ | ||
'user_id_str', | ||
'id_str', | ||
'favorite_count', | ||
'quote_count', | ||
'reply_count', | ||
'retweet_count', | ||
] | ||
df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce') | ||
cols = cols or [ | ||
'id_str', | ||
'user_id_str', | ||
'created_at', | ||
'urls', | ||
'full_text', | ||
'favorite_count', | ||
'quote_count', | ||
'reply_count', | ||
'retweet_count', | ||
'lang', | ||
] | ||
return df[cols] | ||
except Exception as e: | ||
print(e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
__title__ = "twitter-api-client" | ||
__description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs." | ||
__version__ = "0.10.6" | ||
__version__ = "0.10.7" | ||
__author__ = "Trevor Hobenshield" | ||
__license__ = "MIT" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.