added dm-search utils and cookie support

ZakariaMQ · Jun 15, 2023 · b6e6d66 · b6e6d66
1 parent d0ad703
commit b6e6d66
Show file tree

Hide file tree

Showing 8 changed files with 248 additions and 33 deletions.
diff --git a/readme.md b/readme.md
@@ -30,8 +30,15 @@ pip install twitter-api-client
 ```python
 from twitter.account import Account
 
+## sign-in with credentials
 email, username, password = ..., ..., ...
-account = Account(email, username, password, debug=2, save=True)
+account = Account(email, username, password)
+
+## or, resume session using cookies
+# account = Account(cookies={"ct0": ..., "auth_token": ...})
+
+## or, resume session using cookies (JSON file)
+# account = Account(cookies='twitter.cookies')
 
 account.tweet('test 123')
 account.untweet(123456)
@@ -112,6 +119,15 @@ latest_timeline = account.home_latest_timeline(limit=500)
 # get bookmarks
 bookmarks = account.bookmarks()
 
+# get all dms
+dms = account.dm_history(['12345-67890'])
+
+# search dms
+dms = account.dm_search('test')
+
+# delete conversation
+account.dm_delete('12345-67890')
+
 # example configuration
 account.update_settings({
     "address_book_live_sync_enabled": False,
@@ -177,8 +193,19 @@ account.update_search_settings({
 ```python
 from twitter.scraper import Scraper
 
+## sign-in with credentials
 email, username, password = ..., ..., ...
-scraper = Scraper(email, username, password, debug=1, save=True)
+scraper = Scraper(email, username, password)
+
+## or, resume session using cookies
+# scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})
+
+## or, resume session using cookies (JSON file)
+# scraper = Scraper(cookies='twitter.cookies')
+
+## or, initialize guest session (limited endpoints)
+# from twitter.util import init_session
+# scraper = Scraper(session=init_session())
 
 # user data
 users = scraper.users(['foo', 'bar', 'hello', 'world'])
@@ -218,7 +245,7 @@ scraper.trends()
 from twitter.scraper import Scraper
 
 email, username, password = ...,...,...
-scraper = Scraper(email, username, password, debug=1, save=True)
+scraper = Scraper(email, username, password)
 
 user_id = 44196397
 cursor = '1767341853908517597|1663601806447476672'  # example cursor
@@ -238,7 +265,7 @@ from twitter.search import Search
 
 email, username, password = ..., ..., ...
 # default output directory is `data/raw` if save=True
-search = Search(email, username, password, debug=1, save=True)
+search = Search(email, username, password)
 
 latest_results = search.run(
     'brasil portugal -argentina',
@@ -281,7 +308,7 @@ from twitter.scraper import Scraper
 from twitter.util import init_session
 
 session = init_session() # initialize guest session, no login required
-scraper = Scraper(session=session, debug=1, save=True)
+scraper = Scraper(session=session)
 
 rooms = [...]
 scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms
@@ -298,7 +325,7 @@ from twitter.scraper import Scraper
 from twitter.util import init_session
 
 session = init_session() # initialize guest session, no login required
-scraper = Scraper(session=session, debug=1, save=True)
+scraper = Scraper(session=session)
 
 # room must be live, i.e. in "Running" state
 scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2)  # word-level live transcript. (dirty, on-the-fly transcription before post-processing)
@@ -315,7 +342,7 @@ from twitter.scraper import Scraper
 from twitter.util import init_session
 
 session = init_session() # initialize guest session, no login required
-scraper = Scraper(session=session, debug=1, save=True)
+scraper = Scraper(session=session)
 
 # room must be live, i.e. in "Running" state
 scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1)  # finalized live transcript.  (clean)
@@ -328,7 +355,7 @@ from twitter.util import init_session
 from twitter.constants import SpaceCategory
 
 session = init_session() # initialize guest session, no login required
-scraper = Scraper(session=session, debug=1, save=True)
+scraper = Scraper(session=session)
 
 # download audio and chat-log from space
 spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True)

diff --git a/scripts/update.py b/scripts/update.py
@@ -5,6 +5,7 @@
 import subprocess
 from pathlib import Path
 
+import aiofiles
 import orjson
 from httpx import AsyncClient, Client, Response
 
@@ -79,6 +80,8 @@ async def get(session: AsyncClient, url: str, **kwargs) -> tuple[str, str]:
     try:
         logger.debug(f"GET {url}")
         r = await session.get(url)
+        async with aiofiles.open(JS_FILES / url.split('/')[-1], 'wb') as f:
+            await f.write(r.content)
         return url, r.text
     except Exception as e:
         logger.error(f"[{RED}failed{RESET}] Failed to get {url}\n{e}")
@@ -115,7 +118,7 @@ def main():
         if not re.search('participantreaction|\.countries-|emojipicker|i18n|icons\/', k, flags=re.I)
         # if 'endpoint' in k
     )
-    asyncio.run(process(session, get, urls))
+    # asyncio.run(process(session, get, urls))
     get_strings()
     get_features()
 

diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
 
 setup(
     name="twitter-api-client",
-    version="0.9.5",
+    version="0.9.6",
     python_requires=">=3.10.10",
     description="Twitter API",
     long_description=dedent('''
@@ -48,8 +48,16 @@
     ```python
     from twitter.account import Account
     
+    ## sign-in with credentials
     email, username, password = ..., ..., ...
-    account = Account(email, username, password, debug=2, save=True)
+    account = Account(email, username, password)
+    
+    ## or, resume session using cookies
+    # account = Account(cookies={"ct0": ..., "auth_token": ...})
+    
+    ## or, resume session using cookies (JSON file)
+    # account = Account(cookies='twitter.cookies')
+    
     
     account.tweet('test 123')
     account.untweet(123456)
@@ -130,6 +138,15 @@
     # get bookmarks
     bookmarks = account.bookmarks()
     
+    # get all dms
+    dms = account.dm_history(['12345-67890'])
+    
+    # search dms
+    dms = account.dm_search('test')
+    
+    # delete conversation
+    account.dm_delete('12345-67890')
+    
     # example configuration
     account.update_settings({
         "address_book_live_sync_enabled": False,
@@ -193,8 +210,19 @@
     ```python
     from twitter.scraper import Scraper
     
+    ## sign-in with credentials
     email, username, password = ..., ..., ...
-    scraper = Scraper(email, username, password, debug=1, save=True)
+    scraper = Scraper(email, username, password)
+    
+    ## or, resume session using cookies
+    # scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})
+    
+    ## or, resume session using cookies (JSON file)
+    # scraper = Scraper(cookies='twitter.cookies')
+    
+    ## or, initialize guest session (limited endpoints)
+    # from twitter.util import init_session
+    # scraper = Scraper(session=init_session())
     
     # user data
     users = scraper.users(['foo', 'bar', 'hello', 'world'])
@@ -234,7 +262,7 @@
     from twitter.scraper import Scraper
     
     email, username, password = ...,...,...
-    scraper = Scraper(email, username, password, debug=1, save=True)
+    scraper = Scraper(email, username, password)
     
     user_id = 44196397
     cursor = '1767341853908517597|1663601806447476672'  # example cursor
@@ -251,7 +279,7 @@
     
     email, username, password = ..., ..., ...
     # default output directory is `data/raw` if save=True
-    search = Search(email, username, password, debug=1, save=True)
+    search = Search(email, username, password)
     
     latest_results = search.run(
         'brasil portugal -argentina',
@@ -292,7 +320,7 @@
     from twitter.util import init_session
     
     session = init_session() # initialize guest session, no login required
-    scraper = Scraper(session=session, debug=1, save=True)
+    scraper = Scraper(session=session)
     
     rooms = [...]
     scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms
@@ -307,7 +335,7 @@
     from twitter.util import init_session
     
     session = init_session() # initialize guest session, no login required
-    scraper = Scraper(session=session, debug=1, save=True)
+    scraper = Scraper(session=session)
     
     # room must be live, i.e. in "Running" state
     scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2)  # word-level live transcript. (dirty, on-the-fly transcription before post-processing)
@@ -320,7 +348,7 @@
     from twitter.util import init_session
     
     session = init_session() # initialize guest session, no login required
-    scraper = Scraper(session=session, debug=1, save=True)
+    scraper = Scraper(session=session)
     
     # room must be live, i.e. in "Running" state
     scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1)  # finalized live transcript.  (clean)
@@ -333,7 +361,7 @@
     from twitter.constants import SpaceCategory
     
     session = init_session() # initialize guest session, no login required
-    scraper = Scraper(session=session, debug=1, save=True)
+    scraper = Scraper(session=session)
     
     # download audio and chat-log from space
     spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True)
@@ -372,7 +400,7 @@
     
     email, username, password = ..., ..., ...
     proton_email, proton_password = ..., ...
-    account = Scraper(email, username, password, debug=1, save=True, protonmail={'email':proton_email, 'password':proton_password})
+    account = Scraper(email, username, password, protonmail={'email':proton_email, 'password':proton_password})
     ```
     
     '''),

diff --git a/twitter/account.py b/twitter/account.py
@@ -1,3 +1,4 @@
+import asyncio
 import hashlib
 import logging.config
 import math
@@ -9,7 +10,9 @@
 from string import ascii_letters
 from uuid import uuid1, getnode
 
+from httpx import AsyncClient, Limits
 from tqdm import tqdm
+from tqdm.asyncio import tqdm_asyncio
 
 from .constants import *
 from .login import login
@@ -576,11 +579,88 @@ def _init_logger(cfg: dict) -> Logger:
     @staticmethod
     def _validate_session(*args, **kwargs):
         email, username, password, session = args
+
+        # validate credentials
+        if all((email, username, password)):
+            return login(email, username, password, **kwargs)
+
+        # invalid credentials, try validating session
         if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}):
-            # authenticated session provided
             return session
-        if not session:
-            # no session provided, login to authenticate
-            return login(email, username, password, **kwargs)
+
+        # invalid credentials and session
+        cookies = kwargs.get('cookies')
+
+        # try validating cookies dict
+        if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}):
+            _session = Client(cookies=cookies, follow_redirects=True)
+            _session.headers.update(get_headers(_session))
+            return _session
+
+        # try validating cookies from file
+        if isinstance(cookies, str):
+            _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True)
+            _session.headers.update(get_headers(_session))
+            return _session
+
         raise Exception('Session not authenticated. '
                         'Please use an authenticated session or remove the `session` argument and try again.')
+
+    def dm_history(self, conversation_ids: list[str]) -> list[dict]:
+        async def get(session: AsyncClient, conversation_id: str):
+            params = deepcopy(dm_history_params)
+            r = await session.get(
+                f'{self.v1_api}/dm/conversation/{conversation_id}.json',
+                params=params,
+            )
+            res = r.json().get('conversation_timeline', {})
+            data = [x['message'] for x in res.get('entries', [])]
+            entry_id = res.get('min_entry_id')
+            while entry_id:
+                params['max_id'] = entry_id
+                r = await session.get(
+                    f'{self.v1_api}/dm/conversation/{conversation_id}.json',
+                    params=params,
+                )
+                res = r.json().get('conversation_timeline', {})
+                data.extend(x['message'] for x in res.get('entries', []))
+                entry_id = res.get('min_entry_id')
+            return data
+
+        async def process():
+            limits = Limits(max_connections=100)
+            headers, cookies = get_headers(self.session), self.session.cookies
+            async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
+                return await tqdm_asyncio.gather(*(get(c, _id) for _id in conversation_ids), desc="Getting DMs")
+
+        return asyncio.run(process())
+
+    def dm_delete(self, conversation_id: str):
+        return self.session.post(
+            f'{self.v1_api}/dm/conversation/{conversation_id}/delete.json',
+            headers=get_headers(self.session),
+        )
+
+    def dm_search(self, query: str):
+        def get(cursor=None):
+            if cursor:
+                params['variables']['cursor'] = cursor.pop()
+            _id, op = Operation.DmAllSearchSlice
+            r = self.session.get(
+                f'https://twitter.com/i/api/graphql/{_id}/{op}',
+                params=build_params(params)
+            )
+            res = r.json()
+            cursor = find_key(res, 'next_cursor')
+            return res, cursor
+
+        variables = deepcopy(Operation.default_variables)
+        variables['count'] = 50  # strict limit, errors thrown if exceeded
+        variables['query'] = query
+        params = {'variables': variables, 'features': Operation.default_features}
+        res, cursor = get()
+        data = [res]
+        while cursor:
+            res, cursor = get(cursor)
+            data.append(res)
+        return {'query': query, 'data': data}