Skip to content

Commit

Permalink
added dm-search utils and cookie support
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorhobenshield committed Jun 15, 2023
1 parent d0ad703 commit b6e6d66
Show file tree
Hide file tree
Showing 8 changed files with 248 additions and 33 deletions.
43 changes: 35 additions & 8 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ pip install twitter-api-client
```python
from twitter.account import Account

## sign-in with credentials
email, username, password = ..., ..., ...
account = Account(email, username, password, debug=2, save=True)
account = Account(email, username, password)

## or, resume session using cookies
# account = Account(cookies={"ct0": ..., "auth_token": ...})

## or, resume session using cookies (JSON file)
# account = Account(cookies='twitter.cookies')

account.tweet('test 123')
account.untweet(123456)
Expand Down Expand Up @@ -112,6 +119,15 @@ latest_timeline = account.home_latest_timeline(limit=500)
# get bookmarks
bookmarks = account.bookmarks()

# get all dms
dms = account.dm_history(['12345-67890'])

# search dms
dms = account.dm_search('test')

# delete conversation
account.dm_delete('12345-67890')

# example configuration
account.update_settings({
"address_book_live_sync_enabled": False,
Expand Down Expand Up @@ -177,8 +193,19 @@ account.update_search_settings({
```python
from twitter.scraper import Scraper

## sign-in with credentials
email, username, password = ..., ..., ...
scraper = Scraper(email, username, password, debug=1, save=True)
scraper = Scraper(email, username, password)

## or, resume session using cookies
# scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})

## or, resume session using cookies (JSON file)
# scraper = Scraper(cookies='twitter.cookies')

## or, initialize guest session (limited endpoints)
# from twitter.util import init_session
# scraper = Scraper(session=init_session())

# user data
users = scraper.users(['foo', 'bar', 'hello', 'world'])
Expand Down Expand Up @@ -218,7 +245,7 @@ scraper.trends()
from twitter.scraper import Scraper

email, username, password = ...,...,...
scraper = Scraper(email, username, password, debug=1, save=True)
scraper = Scraper(email, username, password)

user_id = 44196397
cursor = '1767341853908517597|1663601806447476672' # example cursor
Expand All @@ -238,7 +265,7 @@ from twitter.search import Search

email, username, password = ..., ..., ...
# default output directory is `data/raw` if save=True
search = Search(email, username, password, debug=1, save=True)
search = Search(email, username, password)

latest_results = search.run(
'brasil portugal -argentina',
Expand Down Expand Up @@ -281,7 +308,7 @@ from twitter.scraper import Scraper
from twitter.util import init_session

session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)

rooms = [...]
scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms
Expand All @@ -298,7 +325,7 @@ from twitter.scraper import Scraper
from twitter.util import init_session

session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)

# room must be live, i.e. in "Running" state
scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2) # word-level live transcript. (dirty, on-the-fly transcription before post-processing)
Expand All @@ -315,7 +342,7 @@ from twitter.scraper import Scraper
from twitter.util import init_session

session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)

# room must be live, i.e. in "Running" state
scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1) # finalized live transcript. (clean)
Expand All @@ -328,7 +355,7 @@ from twitter.util import init_session
from twitter.constants import SpaceCategory

session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)

# download audio and chat-log from space
spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True)
Expand Down
5 changes: 4 additions & 1 deletion scripts/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import subprocess
from pathlib import Path

import aiofiles
import orjson
from httpx import AsyncClient, Client, Response

Expand Down Expand Up @@ -79,6 +80,8 @@ async def get(session: AsyncClient, url: str, **kwargs) -> tuple[str, str]:
try:
logger.debug(f"GET {url}")
r = await session.get(url)
async with aiofiles.open(JS_FILES / url.split('/')[-1], 'wb') as f:
await f.write(r.content)
return url, r.text
except Exception as e:
logger.error(f"[{RED}failed{RESET}] Failed to get {url}\n{e}")
Expand Down Expand Up @@ -115,7 +118,7 @@ def main():
if not re.search('participantreaction|\.countries-|emojipicker|i18n|icons\/', k, flags=re.I)
# if 'endpoint' in k
)
asyncio.run(process(session, get, urls))
# asyncio.run(process(session, get, urls))
get_strings()
get_features()

Expand Down
48 changes: 38 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

setup(
name="twitter-api-client",
version="0.9.5",
version="0.9.6",
python_requires=">=3.10.10",
description="Twitter API",
long_description=dedent('''
Expand Down Expand Up @@ -48,8 +48,16 @@
```python
from twitter.account import Account
## sign-in with credentials
email, username, password = ..., ..., ...
account = Account(email, username, password, debug=2, save=True)
account = Account(email, username, password)
## or, resume session using cookies
# account = Account(cookies={"ct0": ..., "auth_token": ...})
## or, resume session using cookies (JSON file)
# account = Account(cookies='twitter.cookies')
account.tweet('test 123')
account.untweet(123456)
Expand Down Expand Up @@ -130,6 +138,15 @@
# get bookmarks
bookmarks = account.bookmarks()
# get all dms
dms = account.dm_history(['12345-67890'])
# search dms
dms = account.dm_search('test')
# delete conversation
account.dm_delete('12345-67890')
# example configuration
account.update_settings({
"address_book_live_sync_enabled": False,
Expand Down Expand Up @@ -193,8 +210,19 @@
```python
from twitter.scraper import Scraper
## sign-in with credentials
email, username, password = ..., ..., ...
scraper = Scraper(email, username, password, debug=1, save=True)
scraper = Scraper(email, username, password)
## or, resume session using cookies
# scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})
## or, resume session using cookies (JSON file)
# scraper = Scraper(cookies='twitter.cookies')
## or, initialize guest session (limited endpoints)
# from twitter.util import init_session
# scraper = Scraper(session=init_session())
# user data
users = scraper.users(['foo', 'bar', 'hello', 'world'])
Expand Down Expand Up @@ -234,7 +262,7 @@
from twitter.scraper import Scraper
email, username, password = ...,...,...
scraper = Scraper(email, username, password, debug=1, save=True)
scraper = Scraper(email, username, password)
user_id = 44196397
cursor = '1767341853908517597|1663601806447476672' # example cursor
Expand All @@ -251,7 +279,7 @@
email, username, password = ..., ..., ...
# default output directory is `data/raw` if save=True
search = Search(email, username, password, debug=1, save=True)
search = Search(email, username, password)
latest_results = search.run(
'brasil portugal -argentina',
Expand Down Expand Up @@ -292,7 +320,7 @@
from twitter.util import init_session
session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)
rooms = [...]
scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms
Expand All @@ -307,7 +335,7 @@
from twitter.util import init_session
session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)
# room must be live, i.e. in "Running" state
scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2) # word-level live transcript. (dirty, on-the-fly transcription before post-processing)
Expand All @@ -320,7 +348,7 @@
from twitter.util import init_session
session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)
# room must be live, i.e. in "Running" state
scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1) # finalized live transcript. (clean)
Expand All @@ -333,7 +361,7 @@
from twitter.constants import SpaceCategory
session = init_session() # initialize guest session, no login required
scraper = Scraper(session=session, debug=1, save=True)
scraper = Scraper(session=session)
# download audio and chat-log from space
spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True)
Expand Down Expand Up @@ -372,7 +400,7 @@
email, username, password = ..., ..., ...
proton_email, proton_password = ..., ...
account = Scraper(email, username, password, debug=1, save=True, protonmail={'email':proton_email, 'password':proton_password})
account = Scraper(email, username, password, protonmail={'email':proton_email, 'password':proton_password})
```
'''),
Expand Down
88 changes: 84 additions & 4 deletions twitter/account.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import hashlib
import logging.config
import math
Expand All @@ -9,7 +10,9 @@
from string import ascii_letters
from uuid import uuid1, getnode

from httpx import AsyncClient, Limits
from tqdm import tqdm
from tqdm.asyncio import tqdm_asyncio

from .constants import *
from .login import login
Expand Down Expand Up @@ -576,11 +579,88 @@ def _init_logger(cfg: dict) -> Logger:
@staticmethod
def _validate_session(*args, **kwargs):
email, username, password, session = args

# validate credentials
if all((email, username, password)):
return login(email, username, password, **kwargs)

# invalid credentials, try validating session
if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}):
# authenticated session provided
return session
if not session:
# no session provided, login to authenticate
return login(email, username, password, **kwargs)

# invalid credentials and session
cookies = kwargs.get('cookies')

# try validating cookies dict
if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}):
_session = Client(cookies=cookies, follow_redirects=True)
_session.headers.update(get_headers(_session))
return _session

# try validating cookies from file
if isinstance(cookies, str):
_session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True)
_session.headers.update(get_headers(_session))
return _session

raise Exception('Session not authenticated. '
'Please use an authenticated session or remove the `session` argument and try again.')

def dm_history(self, conversation_ids: list[str]) -> list[dict]:
async def get(session: AsyncClient, conversation_id: str):
params = deepcopy(dm_history_params)
r = await session.get(
f'{self.v1_api}/dm/conversation/{conversation_id}.json',
params=params,
)
res = r.json().get('conversation_timeline', {})
data = [x['message'] for x in res.get('entries', [])]
entry_id = res.get('min_entry_id')
while entry_id:
params['max_id'] = entry_id
r = await session.get(
f'{self.v1_api}/dm/conversation/{conversation_id}.json',
params=params,
)
res = r.json().get('conversation_timeline', {})
data.extend(x['message'] for x in res.get('entries', []))
entry_id = res.get('min_entry_id')
return data

async def process():
limits = Limits(max_connections=100)
headers, cookies = get_headers(self.session), self.session.cookies
async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
return await tqdm_asyncio.gather(*(get(c, _id) for _id in conversation_ids), desc="Getting DMs")

return asyncio.run(process())

def dm_delete(self, conversation_id: str):
return self.session.post(
f'{self.v1_api}/dm/conversation/{conversation_id}/delete.json',
headers=get_headers(self.session),
)

def dm_search(self, query: str):
def get(cursor=None):
if cursor:
params['variables']['cursor'] = cursor.pop()
_id, op = Operation.DmAllSearchSlice
r = self.session.get(
f'https://twitter.com/i/api/graphql/{_id}/{op}',
params=build_params(params)
)
res = r.json()
cursor = find_key(res, 'next_cursor')
return res, cursor

variables = deepcopy(Operation.default_variables)
variables['count'] = 50 # strict limit, errors thrown if exceeded
variables['query'] = query
params = {'variables': variables, 'features': Operation.default_features}
res, cursor = get()
data = [res]
while cursor:
res, cursor = get(cursor)
data.append(res)
return {'query': query, 'data': data}
Loading

0 comments on commit b6e6d66

Please sign in to comment.