add new GraphQL endpoint /SearchTimeline. `api.twitter.com/2/search…

…/adaptive.json` deprecated
ZakariaMQ · Jul 2, 2023 · afba99f · afba99f
1 parent 55425b7
commit afba99f
Show file tree

Hide file tree

Showing 6 changed files with 275 additions and 158 deletions.
diff --git a/examples/postprocess.py b/examples/postprocess.py
@@ -0,0 +1,96 @@
+import re
+import pandas as pd
+from twitter.util import find_key
+
+
+def get_tweets(data: list | dict, cols: list[str] = None):
+    """
+    Convert raw GraphQL response to DataFrame
+
+    @param data: tweets
+    @param cols: option to only include certain columns
+    @return: DataFrame of tweets
+    """
+    entries = [y for x in find_key(data, 'entries') for y in x]
+    # filter out promoted tweets
+    tweets = [x for x in entries if not x.get('entryId').startswith('promoted')]
+    df = (
+        pd.json_normalize(find_key(tweets, 'tweet_results'), max_level=1)
+        ['result.legacy'].apply(pd.Series)
+        .dropna(subset='user_id_str')
+        .assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
+        .sort_values('created_at', ascending=False)
+        .reset_index(drop=True)
+    )
+    numeric = [
+        'user_id_str',
+        'id_str',
+        'favorite_count',
+        'quote_count',
+        'reply_count',
+        'retweet_count',
+    ]
+    df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
+    cols = cols or [
+        'id_str',
+        'user_id_str',
+        'created_at',
+        'full_text',
+        'favorite_count',
+        'quote_count',
+        'reply_count',
+        'retweet_count',
+        'lang',
+    ]
+    return df[cols]
+
+
+def get_tweets_urls(data: dict | list, expr: str, cols: list[str] = None) -> pd.DataFrame:
+    """
+    Convert raw GraphQL response to DataFrame
+
+    Search for tweets containing specific urls by regex
+
+    @param data: tweets
+    @param expr: regex to match urls
+    @param cols: option to only include certain columns
+    @return: DataFrame of tweets matching the expression
+    """
+    tweet_results = find_key(data, 'tweet_results')
+    results = []
+    for res in tweet_results:
+        legacy = res.get('result', {}).get('legacy', {})
+        urls = find_key(res, 'expanded_url')
+        if any(re.search(expr, x) for x in urls):
+            results.append({'urls': urls} | legacy)
+    try:
+        df = (
+            pd.DataFrame(results)
+            .assign(date=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
+            .sort_values('created_at', ascending=False)
+            .reset_index(drop=True)
+        )
+        numeric = [
+            'user_id_str',
+            'id_str',
+            'favorite_count',
+            'quote_count',
+            'reply_count',
+            'retweet_count',
+        ]
+        df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
+        cols = cols or [
+            'id_str',
+            'user_id_str',
+            'created_at',
+            'urls',
+            'full_text',
+            'favorite_count',
+            'quote_count',
+            'reply_count',
+            'retweet_count',
+            'lang',
+        ]
+        return df[cols]
+    except Exception as e:
+        print(e)
diff --git a/readme.md b/readme.md
@@ -296,28 +296,34 @@ follower_subset, last_cursor = scraper.followers([user_id], limit=limit, cursor=
 from twitter.search import Search
 
 email, username, password = ..., ..., ...
-# default output directory is `data/raw` if save=True
-search = Search(email, username, password)
-
-latest_results = search.run(
-    'brasil portugal -argentina',
-    'paperswithcode -tensorflow -tf',
-    'ios android',
-    limit=100,
-    latest=True,  # get latest tweets only
-    retries=3,
-)
-
-general_results = search.run(
-    '(#dogs OR #cats) min_retweets:500',
-    'min_faves:10000 @elonmusk until:2023-02-16 since:2023-02-01',
-    'brasil portugal -argentina',
-    'paperswithcode -tensorflow -tf',
-    'skateboarding baseball guitar',
-    'cheese bread butter',
-    'ios android',
-    limit=100,
-    retries=7,
+# default output directory is `data/search_results` if save=True
+search = Search(email, username, password, save=True, debug=1)
+
+res = search.run(
+    limit=37,
+    retries=5,
+    queries=[
+        {
+            'category': 'Top',
+            'query': 'paperswithcode -tensorflow -tf'
+        },
+        {
+            'category': 'Latest',
+            'query': 'test'
+        },
+        {
+            'category': 'People',
+            'query': 'brasil portugal -argentina'
+        },
+        {
+            'category': 'Photos',
+            'query': 'greece'
+        },
+        {
+            'category': 'Videos',
+            'query': 'italy'
+        },
+    ],
 )
 ```
 

diff --git a/setup.py b/setup.py
@@ -315,28 +315,34 @@
     from twitter.search import Search
 
     email, username, password = ..., ..., ...
-    # default output directory is `data/raw` if save=True
-    search = Search(email, username, password)
-
-    latest_results = search.run(
-        'brasil portugal -argentina',
-        'paperswithcode -tensorflow -tf',
-        'ios android',
-        limit=100,
-        latest=True,  # get latest tweets only
-        retries=3,
-    )
-
-    general_results = search.run(
-        '(#dogs OR #cats) min_retweets:500',
-        'min_faves:10000 @elonmusk until:2023-02-16 since:2023-02-01',
-        'brasil portugal -argentina',
-        'paperswithcode -tensorflow -tf',
-        'skateboarding baseball guitar',
-        'cheese bread butter',
-        'ios android',
-        limit=100,
-        retries=7,
+    # default output directory is `data/search_results` if save=True
+    search = Search(email, username, password, save=True, debug=1)
+
+    res = search.run(
+        limit=37,
+        retries=5,
+        queries=[
+            {
+                'category': 'Top',
+                'query': 'paperswithcode -tensorflow -tf'
+            },
+            {
+                'category': 'Latest',
+                'query': 'test'
+            },
+            {
+                'category': 'People',
+                'query': 'brasil portugal -argentina'
+            },
+            {
+                'category': 'Photos',
+                'query': 'greece'
+            },
+            {
+                'category': 'Videos',
+                'query': 'italy'
+            },
+        ],
     )
     ```
 

diff --git a/twitter/__version__.py b/twitter/__version__.py
@@ -1,5 +1,5 @@
 __title__ = "twitter-api-client"
 __description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs."
-__version__ = "0.10.6"
+__version__ = "0.10.7"
 __author__ = "Trevor Hobenshield"
 __license__ = "MIT"
diff --git a/twitter/constants.py b/twitter/constants.py
@@ -65,6 +65,15 @@
 }
 
 
+@dataclass
+class SearchCategory:
+    Top = 'Top'
+    Latest = 'Latest'
+    People = 'People'
+    Photos = 'Photos'
+    Videos = 'Videos'
+
+
 @dataclass
 class SpaceCategory:
     Top = 'Top'
@@ -85,6 +94,7 @@ class SpaceState:
 @dataclass
 class Operation:
     # todo: dynamically update
+    SearchTimeline = {'rawQuery': str, 'product': str}, 'nK1dw4oV3k4w5TdtcAdSww', 'SearchTimeline'
     AudioSpaceById = {'id': str}, 'fYAuJHiY3TmYdBmrRtIKhA', 'AudioSpaceById'
     AudioSpaceSearch = {'filter': str, 'query': str}, 'NTq79TuSz6fHj8lQaferJw', 'AudioSpaceSearch',
     UserByScreenName = {'screen_name': str}, 'sLVLhk0bGj3MVFEKTdax1w', 'UserByScreenName'
@@ -272,7 +282,6 @@ class Operation:
     RitoFlaggedAccountsTimeline = 'lMzaBZHIbD6GuPqJJQubMg', 'RitoFlaggedAccountsTimeline'
     RitoFlaggedTweetsTimeline = 'iCuXMibh6yj9AelyjKXDeA', 'RitoFlaggedTweetsTimeline'
     RitoSuggestedActionsFacePile = 'GnQKeEdL1LyeK3dTQCS1yw', 'RitoSuggestedActionsFacePile'
-    SearchTimeline = 'gkjsKepM6gl_HmFWoWKfgg', 'SearchTimeline'
     SetDefault = 'QEMLEzEMzoPNbeauKCCLbg', 'SetDefault'
     SetSafetyModeSettings = 'qSJIPIpf4gA7Wn21bT3D4w', 'SetSafetyModeSettings'
     SharingAudiospacesListeningDataWithFollowersUpdate = '5h0kNbk3ii97rmfY6CdgAA', 'SharingAudiospacesListeningDataWithFollowersUpdate'
@@ -351,40 +360,42 @@ class Operation:
         'withMessages': True,
     }
     default_features = {
-        "blue_business_profile_image_shape_enabled": True,
-        "creator_subscriptions_tweet_preview_api_enabled": True,
-        "freedom_of_speech_not_reach_fetch_enabled": False,
-        "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
-        "graphql_timeline_v2_bookmark_timeline": True,
-        "hidden_profile_likes_enabled": True,
-        "highlights_tweets_tab_ui_enabled": True,
-        "interactive_text_enabled": True,
-        "longform_notetweets_consumption_enabled": True,
-        "longform_notetweets_inline_media_enabled": False,
-        "longform_notetweets_rich_text_read_enabled": True,
-        "longform_notetweets_richtext_consumption_enabled": True,
-        "profile_foundations_tweet_stats_enabled": True,
-        "profile_foundations_tweet_stats_tweet_frequency": True,
-        "responsive_web_birdwatch_note_limit_enabled": True,
-        "responsive_web_edit_tweet_api_enabled": True,
-        "responsive_web_enhance_cards_enabled": False,
-        "responsive_web_graphql_exclude_directive_enabled": True,
-        "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
-        "responsive_web_graphql_timeline_navigation_enabled": True,
-        "responsive_web_text_conversations_enabled": False,
-        "responsive_web_twitter_article_data_v2_enabled": True,
-        "responsive_web_twitter_blue_verified_badge_is_enabled": True,
-        "rweb_lists_timeline_redesign_enabled": True,
-        "spaces_2022_h2_clipping": True,
-        "spaces_2022_h2_spaces_communities": True,
-        "standardized_nudges_misinfo": True,
-        "subscriptions_verification_info_verified_since_enabled": True,
-        "tweet_awards_web_tipping_enabled": False,
-        "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": False,
-        "tweetypie_unmention_optimization_enabled": True,
-        "verified_phone_label_enabled": False,
-        "vibe_api_enabled": True,
-        "view_counts_everywhere_api_enabled": True,
+        'blue_business_profile_image_shape_enabled': True,
+        'creator_subscriptions_tweet_preview_api_enabled': True,
+        'freedom_of_speech_not_reach_fetch_enabled': True,
+        'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
+        'graphql_timeline_v2_bookmark_timeline': True,
+        'hidden_profile_likes_enabled': True,
+        'highlights_tweets_tab_ui_enabled': True,
+        'interactive_text_enabled': True,
+        'longform_notetweets_consumption_enabled': True,
+        'longform_notetweets_inline_media_enabled': True,
+        'longform_notetweets_rich_text_read_enabled': True,
+        'longform_notetweets_richtext_consumption_enabled': True,
+        'profile_foundations_tweet_stats_enabled': True,
+        'profile_foundations_tweet_stats_tweet_frequency': True,
+        'responsive_web_birdwatch_note_limit_enabled': True,
+        'responsive_web_edit_tweet_api_enabled': True,
+        'responsive_web_enhance_cards_enabled': False,
+        'responsive_web_graphql_exclude_directive_enabled': True,
+        'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
+        'responsive_web_graphql_timeline_navigation_enabled': True,
+        'responsive_web_media_download_video_enabled': False,
+        'responsive_web_text_conversations_enabled': False,
+        'responsive_web_twitter_article_data_v2_enabled': True,
+        'responsive_web_twitter_article_tweet_consumption_enabled': False,
+        'responsive_web_twitter_blue_verified_badge_is_enabled': True,
+        'rweb_lists_timeline_redesign_enabled': True,
+        'spaces_2022_h2_clipping': True,
+        'spaces_2022_h2_spaces_communities': True,
+        'standardized_nudges_misinfo': True,
+        'subscriptions_verification_info_verified_since_enabled': True,
+        'tweet_awards_web_tipping_enabled': False,
+        'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
+        'tweetypie_unmention_optimization_enabled': True,
+        'verified_phone_label_enabled': False,
+        'vibe_api_enabled': True,
+        'view_counts_everywhere_api_enabled': True
     }