Skip to content

Commit

Permalink
add new GraphQL endpoint /SearchTimeline. `api.twitter.com/2/search…
Browse files Browse the repository at this point in the history
…/adaptive.json` deprecated
  • Loading branch information
trevorhobenshield committed Jul 2, 2023
1 parent 55425b7 commit afba99f
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 158 deletions.
96 changes: 96 additions & 0 deletions examples/postprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import re
import pandas as pd
from twitter.util import find_key


def get_tweets(data: list | dict, cols: list[str] = None):
"""
Convert raw GraphQL response to DataFrame
@param data: tweets
@param cols: option to only include certain columns
@return: DataFrame of tweets
"""
entries = [y for x in find_key(data, 'entries') for y in x]
# filter out promoted tweets
tweets = [x for x in entries if not x.get('entryId').startswith('promoted')]
df = (
pd.json_normalize(find_key(tweets, 'tweet_results'), max_level=1)
['result.legacy'].apply(pd.Series)
.dropna(subset='user_id_str')
.assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
.sort_values('created_at', ascending=False)
.reset_index(drop=True)
)
numeric = [
'user_id_str',
'id_str',
'favorite_count',
'quote_count',
'reply_count',
'retweet_count',
]
df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
cols = cols or [
'id_str',
'user_id_str',
'created_at',
'full_text',
'favorite_count',
'quote_count',
'reply_count',
'retweet_count',
'lang',
]
return df[cols]


def get_tweets_urls(data: dict | list, expr: str, cols: list[str] = None) -> pd.DataFrame:
"""
Convert raw GraphQL response to DataFrame
Search for tweets containing specific urls by regex
@param data: tweets
@param expr: regex to match urls
@param cols: option to only include certain columns
@return: DataFrame of tweets matching the expression
"""
tweet_results = find_key(data, 'tweet_results')
results = []
for res in tweet_results:
legacy = res.get('result', {}).get('legacy', {})
urls = find_key(res, 'expanded_url')
if any(re.search(expr, x) for x in urls):
results.append({'urls': urls} | legacy)
try:
df = (
pd.DataFrame(results)
.assign(date=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
.sort_values('created_at', ascending=False)
.reset_index(drop=True)
)
numeric = [
'user_id_str',
'id_str',
'favorite_count',
'quote_count',
'reply_count',
'retweet_count',
]
df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
cols = cols or [
'id_str',
'user_id_str',
'created_at',
'urls',
'full_text',
'favorite_count',
'quote_count',
'reply_count',
'retweet_count',
'lang',
]
return df[cols]
except Exception as e:
print(e)
50 changes: 28 additions & 22 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,28 +296,34 @@ follower_subset, last_cursor = scraper.followers([user_id], limit=limit, cursor=
from twitter.search import Search

email, username, password = ..., ..., ...
# default output directory is `data/raw` if save=True
search = Search(email, username, password)

latest_results = search.run(
'brasil portugal -argentina',
'paperswithcode -tensorflow -tf',
'ios android',
limit=100,
latest=True, # get latest tweets only
retries=3,
)

general_results = search.run(
'(#dogs OR #cats) min_retweets:500',
'min_faves:10000 @elonmusk until:2023-02-16 since:2023-02-01',
'brasil portugal -argentina',
'paperswithcode -tensorflow -tf',
'skateboarding baseball guitar',
'cheese bread butter',
'ios android',
limit=100,
retries=7,
# default output directory is `data/search_results` if save=True
search = Search(email, username, password, save=True, debug=1)

res = search.run(
limit=37,
retries=5,
queries=[
{
'category': 'Top',
'query': 'paperswithcode -tensorflow -tf'
},
{
'category': 'Latest',
'query': 'test'
},
{
'category': 'People',
'query': 'brasil portugal -argentina'
},
{
'category': 'Photos',
'query': 'greece'
},
{
'category': 'Videos',
'query': 'italy'
},
],
)
```

Expand Down
50 changes: 28 additions & 22 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,28 +315,34 @@
from twitter.search import Search
email, username, password = ..., ..., ...
# default output directory is `data/raw` if save=True
search = Search(email, username, password)
latest_results = search.run(
'brasil portugal -argentina',
'paperswithcode -tensorflow -tf',
'ios android',
limit=100,
latest=True, # get latest tweets only
retries=3,
)
general_results = search.run(
'(#dogs OR #cats) min_retweets:500',
'min_faves:10000 @elonmusk until:2023-02-16 since:2023-02-01',
'brasil portugal -argentina',
'paperswithcode -tensorflow -tf',
'skateboarding baseball guitar',
'cheese bread butter',
'ios android',
limit=100,
retries=7,
# default output directory is `data/search_results` if save=True
search = Search(email, username, password, save=True, debug=1)
res = search.run(
limit=37,
retries=5,
queries=[
{
'category': 'Top',
'query': 'paperswithcode -tensorflow -tf'
},
{
'category': 'Latest',
'query': 'test'
},
{
'category': 'People',
'query': 'brasil portugal -argentina'
},
{
'category': 'Photos',
'query': 'greece'
},
{
'category': 'Videos',
'query': 'italy'
},
],
)
```
Expand Down
2 changes: 1 addition & 1 deletion twitter/__version__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__title__ = "twitter-api-client"
__description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs."
__version__ = "0.10.6"
__version__ = "0.10.7"
__author__ = "Trevor Hobenshield"
__license__ = "MIT"
81 changes: 46 additions & 35 deletions twitter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@
}


@dataclass
class SearchCategory:
Top = 'Top'
Latest = 'Latest'
People = 'People'
Photos = 'Photos'
Videos = 'Videos'


@dataclass
class SpaceCategory:
Top = 'Top'
Expand All @@ -85,6 +94,7 @@ class SpaceState:
@dataclass
class Operation:
# todo: dynamically update
SearchTimeline = {'rawQuery': str, 'product': str}, 'nK1dw4oV3k4w5TdtcAdSww', 'SearchTimeline'
AudioSpaceById = {'id': str}, 'fYAuJHiY3TmYdBmrRtIKhA', 'AudioSpaceById'
AudioSpaceSearch = {'filter': str, 'query': str}, 'NTq79TuSz6fHj8lQaferJw', 'AudioSpaceSearch',
UserByScreenName = {'screen_name': str}, 'sLVLhk0bGj3MVFEKTdax1w', 'UserByScreenName'
Expand Down Expand Up @@ -272,7 +282,6 @@ class Operation:
RitoFlaggedAccountsTimeline = 'lMzaBZHIbD6GuPqJJQubMg', 'RitoFlaggedAccountsTimeline'
RitoFlaggedTweetsTimeline = 'iCuXMibh6yj9AelyjKXDeA', 'RitoFlaggedTweetsTimeline'
RitoSuggestedActionsFacePile = 'GnQKeEdL1LyeK3dTQCS1yw', 'RitoSuggestedActionsFacePile'
SearchTimeline = 'gkjsKepM6gl_HmFWoWKfgg', 'SearchTimeline'
SetDefault = 'QEMLEzEMzoPNbeauKCCLbg', 'SetDefault'
SetSafetyModeSettings = 'qSJIPIpf4gA7Wn21bT3D4w', 'SetSafetyModeSettings'
SharingAudiospacesListeningDataWithFollowersUpdate = '5h0kNbk3ii97rmfY6CdgAA', 'SharingAudiospacesListeningDataWithFollowersUpdate'
Expand Down Expand Up @@ -351,40 +360,42 @@ class Operation:
'withMessages': True,
}
default_features = {
"blue_business_profile_image_shape_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"freedom_of_speech_not_reach_fetch_enabled": False,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
"graphql_timeline_v2_bookmark_timeline": True,
"hidden_profile_likes_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
"interactive_text_enabled": True,
"longform_notetweets_consumption_enabled": True,
"longform_notetweets_inline_media_enabled": False,
"longform_notetweets_rich_text_read_enabled": True,
"longform_notetweets_richtext_consumption_enabled": True,
"profile_foundations_tweet_stats_enabled": True,
"profile_foundations_tweet_stats_tweet_frequency": True,
"responsive_web_birdwatch_note_limit_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
"responsive_web_enhance_cards_enabled": False,
"responsive_web_graphql_exclude_directive_enabled": True,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
"responsive_web_graphql_timeline_navigation_enabled": True,
"responsive_web_text_conversations_enabled": False,
"responsive_web_twitter_article_data_v2_enabled": True,
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"rweb_lists_timeline_redesign_enabled": True,
"spaces_2022_h2_clipping": True,
"spaces_2022_h2_spaces_communities": True,
"standardized_nudges_misinfo": True,
"subscriptions_verification_info_verified_since_enabled": True,
"tweet_awards_web_tipping_enabled": False,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": False,
"tweetypie_unmention_optimization_enabled": True,
"verified_phone_label_enabled": False,
"vibe_api_enabled": True,
"view_counts_everywhere_api_enabled": True,
'blue_business_profile_image_shape_enabled': True,
'creator_subscriptions_tweet_preview_api_enabled': True,
'freedom_of_speech_not_reach_fetch_enabled': True,
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
'graphql_timeline_v2_bookmark_timeline': True,
'hidden_profile_likes_enabled': True,
'highlights_tweets_tab_ui_enabled': True,
'interactive_text_enabled': True,
'longform_notetweets_consumption_enabled': True,
'longform_notetweets_inline_media_enabled': True,
'longform_notetweets_rich_text_read_enabled': True,
'longform_notetweets_richtext_consumption_enabled': True,
'profile_foundations_tweet_stats_enabled': True,
'profile_foundations_tweet_stats_tweet_frequency': True,
'responsive_web_birdwatch_note_limit_enabled': True,
'responsive_web_edit_tweet_api_enabled': True,
'responsive_web_enhance_cards_enabled': False,
'responsive_web_graphql_exclude_directive_enabled': True,
'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
'responsive_web_graphql_timeline_navigation_enabled': True,
'responsive_web_media_download_video_enabled': False,
'responsive_web_text_conversations_enabled': False,
'responsive_web_twitter_article_data_v2_enabled': True,
'responsive_web_twitter_article_tweet_consumption_enabled': False,
'responsive_web_twitter_blue_verified_badge_is_enabled': True,
'rweb_lists_timeline_redesign_enabled': True,
'spaces_2022_h2_clipping': True,
'spaces_2022_h2_spaces_communities': True,
'standardized_nudges_misinfo': True,
'subscriptions_verification_info_verified_since_enabled': True,
'tweet_awards_web_tipping_enabled': False,
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
'tweetypie_unmention_optimization_enabled': True,
'verified_phone_label_enabled': False,
'vibe_api_enabled': True,
'view_counts_everywhere_api_enabled': True
}


Expand Down
Loading

0 comments on commit afba99f

Please sign in to comment.