зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 21:16:25 +02:00
add setuptools; add more fields to models; accounts_poll refactoring
Этот коммит содержится в:
родитель
0c1377d3c6
Коммит
9509378441
2
.gitignore
поставляемый
2
.gitignore
поставляемый
@ -2,7 +2,7 @@
|
||||
.ruff_cache/
|
||||
accounts/
|
||||
results-raw/
|
||||
results/
|
||||
results-parsed/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
3
Makefile
3
Makefile
@ -7,6 +7,9 @@ lint:
|
||||
lint-fix:
|
||||
ruff check --fix .
|
||||
|
||||
pylint:
|
||||
pylint --errors-only twapi
|
||||
|
||||
test:
|
||||
pytest --cov=twapi tests/
|
||||
|
||||
|
||||
@ -1,3 +1,47 @@
|
||||
[build-system]
|
||||
requires = ['setuptools>=61', 'setuptools_scm>=6.2']
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "tw-api"
|
||||
version = "0.1.0"
|
||||
authors = [{name = "vladkens"}]
|
||||
description = "Twitter GraphQL and Search API implementation with SNScrape data models"
|
||||
readme = "readme.md"
|
||||
requires-python = ">=3.10"
|
||||
keywords = ["twitter", "api", "scrape", "snscrape", "tw-api", "twapi"]
|
||||
license = {text = "MIT"}
|
||||
classifiers = [
|
||||
'Development Status :: 4 - Beta',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
]
|
||||
dependencies = [
|
||||
"fake-useragent==1.1.3",
|
||||
"httpx==0.24.0",
|
||||
"loguru==0.7.0"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pylint==2.17.3",
|
||||
"pytest-asyncio==0.21.0",
|
||||
"pytest-cov==4.0.0",
|
||||
"pytest==7.3.1",
|
||||
"ruff==0.0.263",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
repository = "https://github.com/vladkens/tw-api"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ['twapi']
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = ["."]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
@ -6,7 +50,3 @@ line-length = 99
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 99
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = ["."]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
15
readme.md
15
readme.md
@ -1,6 +1,12 @@
|
||||
Twitter GraphQL and Search API implementation with [SNScrape](https://github.com/JustAnotherArchivist/snscrape) data models.
|
||||
|
||||
### Usage
|
||||
## Install
|
||||
|
||||
```bash
|
||||
pip install https://github.com/vladkens/tw-api
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
@ -48,6 +54,13 @@ async def main():
|
||||
# change log level, default info
|
||||
set_log_level("DEBUG")
|
||||
|
||||
# Tweet & User model can be converted to regular dict or json, e.g.:
|
||||
doc = await api.user_by_id(user_id) # User
|
||||
doc.dict() # -> python dict
|
||||
doc.json() # -> json string
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
@ -1,4 +0,0 @@
|
||||
ruff==0.0.263
|
||||
pytest==7.3.1
|
||||
pytest-asyncio==0.21.0
|
||||
pytest-cov==4.0.0
|
||||
@ -1,3 +0,0 @@
|
||||
httpx==0.24.0
|
||||
fake-useragent==1.1.3
|
||||
loguru==0.7.0
|
||||
@ -58,52 +58,70 @@ async def test_search():
|
||||
items = await gather(api.search("elon musk lang:en", limit=20))
|
||||
assert len(items) > 0
|
||||
|
||||
for x in items:
|
||||
assert x.id is not None
|
||||
assert x.user is not None
|
||||
for doc in items:
|
||||
assert doc.id is not None
|
||||
assert doc.user is not None
|
||||
|
||||
tw_dict = x.json()
|
||||
assert x.id == tw_dict["id"]
|
||||
assert x.user.id == tw_dict["user"]["id"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.user.id == obj["user"]["id"]
|
||||
assert "url" in obj
|
||||
assert "url" in obj["user"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_user_by_id():
|
||||
api = API(AccountsPool())
|
||||
mock_rep(api, "user_by_id_raw")
|
||||
|
||||
rep = await api.user_by_id(2244994945)
|
||||
assert rep.id == 2244994945
|
||||
assert rep.username == "TwitterDev"
|
||||
doc = await api.user_by_id(2244994945)
|
||||
assert doc.id == 2244994945
|
||||
assert doc.username == "TwitterDev"
|
||||
|
||||
obj = rep.json()
|
||||
assert rep.id == obj["id"]
|
||||
assert rep.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_user_by_login():
|
||||
api = API(AccountsPool())
|
||||
mock_rep(api, "user_by_login_raw")
|
||||
|
||||
rep = await api.user_by_login("twitterdev")
|
||||
assert rep.id == 2244994945
|
||||
assert rep.username == "TwitterDev"
|
||||
doc = await api.user_by_login("twitterdev")
|
||||
assert doc.id == 2244994945
|
||||
assert doc.username == "TwitterDev"
|
||||
|
||||
obj = rep.json()
|
||||
assert rep.id == obj["id"]
|
||||
assert rep.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_tweet_details():
|
||||
api = API(AccountsPool())
|
||||
mock_rep(api, "tweet_details_raw")
|
||||
|
||||
rep = await api.tweet_details(1649191520250245121)
|
||||
assert rep.id == 1649191520250245121
|
||||
assert rep.user is not None
|
||||
doc = await api.tweet_details(1649191520250245121)
|
||||
assert doc.id == 1649191520250245121
|
||||
assert doc.user is not None
|
||||
|
||||
obj = rep.json()
|
||||
assert rep.id == obj["id"]
|
||||
assert rep.user.id == obj["user"]["id"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.user.id == obj["user"]["id"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_followers():
|
||||
@ -113,13 +131,17 @@ async def test_followers():
|
||||
users = await gather(api.followers(2244994945))
|
||||
assert len(users) > 0
|
||||
|
||||
for user in users:
|
||||
assert user.id is not None
|
||||
assert user.username is not None
|
||||
for doc in users:
|
||||
assert doc.id is not None
|
||||
assert doc.username is not None
|
||||
|
||||
obj = user.json()
|
||||
assert user.id == obj["id"]
|
||||
assert user.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_following():
|
||||
@ -129,13 +151,17 @@ async def test_following():
|
||||
users = await gather(api.following(2244994945))
|
||||
assert len(users) > 0
|
||||
|
||||
for user in users:
|
||||
assert user.id is not None
|
||||
assert user.username is not None
|
||||
for doc in users:
|
||||
assert doc.id is not None
|
||||
assert doc.username is not None
|
||||
|
||||
obj = user.json()
|
||||
assert user.id == obj["id"]
|
||||
assert user.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_retweters():
|
||||
@ -145,13 +171,17 @@ async def test_retweters():
|
||||
users = await gather(api.retweeters(1649191520250245121))
|
||||
assert len(users) > 0
|
||||
|
||||
for user in users:
|
||||
assert user.id is not None
|
||||
assert user.username is not None
|
||||
for doc in users:
|
||||
assert doc.id is not None
|
||||
assert doc.username is not None
|
||||
|
||||
obj = user.json()
|
||||
assert user.id == obj["id"]
|
||||
assert user.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_favoriters():
|
||||
@ -161,13 +191,17 @@ async def test_favoriters():
|
||||
users = await gather(api.favoriters(1649191520250245121))
|
||||
assert len(users) > 0
|
||||
|
||||
for user in users:
|
||||
assert user.id is not None
|
||||
assert user.username is not None
|
||||
for doc in users:
|
||||
assert doc.id is not None
|
||||
assert doc.username is not None
|
||||
|
||||
obj = user.json()
|
||||
assert user.id == obj["id"]
|
||||
assert user.username == obj["username"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.username == obj["username"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_user_tweets():
|
||||
@ -177,13 +211,17 @@ async def test_user_tweets():
|
||||
tweets = await gather(api.user_tweets(2244994945))
|
||||
assert len(tweets) > 0
|
||||
|
||||
for tweet in tweets:
|
||||
assert tweet.id is not None
|
||||
assert tweet.user is not None
|
||||
for doc in tweets:
|
||||
assert doc.id is not None
|
||||
assert doc.user is not None
|
||||
|
||||
obj = tweet.json()
|
||||
assert tweet.id == obj["id"]
|
||||
assert tweet.user.id == obj["user"]["id"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.user.id == obj["user"]["id"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def test_user_tweets_and_replies():
|
||||
@ -193,13 +231,17 @@ async def test_user_tweets_and_replies():
|
||||
tweets = await gather(api.user_tweets_and_replies(2244994945))
|
||||
assert len(tweets) > 0
|
||||
|
||||
for tweet in tweets:
|
||||
assert tweet.id is not None
|
||||
assert tweet.user is not None
|
||||
for doc in tweets:
|
||||
assert doc.id is not None
|
||||
assert doc.user is not None
|
||||
|
||||
obj = tweet.json()
|
||||
assert tweet.id == obj["id"]
|
||||
assert tweet.user.id == obj["user"]["id"]
|
||||
obj = doc.dict()
|
||||
assert doc.id == obj["id"]
|
||||
assert doc.user.id == obj["user"]["id"]
|
||||
|
||||
txt = doc.json()
|
||||
assert isinstance(txt, str)
|
||||
assert str(doc.id) in txt
|
||||
|
||||
|
||||
async def main():
|
||||
@ -207,7 +249,7 @@ async def main():
|
||||
# you need to have some account to perform this
|
||||
|
||||
pool = AccountsPool()
|
||||
pool.load_from_dir()
|
||||
pool.restore()
|
||||
|
||||
api = API(pool)
|
||||
|
||||
|
||||
@ -2,4 +2,5 @@
|
||||
from .account import Account
|
||||
from .accounts_pool import AccountsPool
|
||||
from .api import API
|
||||
from .models import *
|
||||
from .utils import gather
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum
|
||||
|
||||
@ -19,10 +18,8 @@ class Status(str, Enum):
|
||||
|
||||
|
||||
class Account:
|
||||
BASE_DIR = "accounts"
|
||||
|
||||
@classmethod
|
||||
def load(cls, filepath: str):
|
||||
def load_from_file(cls, filepath: str):
|
||||
try:
|
||||
with open(filepath) as f:
|
||||
data = json.load(f)
|
||||
@ -81,15 +78,8 @@ class Account:
|
||||
"status": self.status,
|
||||
}
|
||||
|
||||
def save(self):
|
||||
os.makedirs(self.BASE_DIR, exist_ok=True)
|
||||
data = self.dump()
|
||||
with open(f"{self.BASE_DIR}/{self.username}.json", "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def update_limit(self, queue: str, reset_ts: int):
|
||||
self.limits[queue] = datetime.fromtimestamp(reset_ts, tz=timezone.utc)
|
||||
self.save()
|
||||
|
||||
def can_use(self, queue: str):
|
||||
if self.locked.get(queue, False) or self.status != Status.ACTIVE:
|
||||
@ -126,7 +116,6 @@ class Account:
|
||||
if e.response.status_code == 403:
|
||||
logger.error(f"403 error {log_id}")
|
||||
self.status = Status.LOGIN_ERROR
|
||||
self.save()
|
||||
return
|
||||
|
||||
self.client.headers["x-csrf-token"] = self.client.cookies["ct0"]
|
||||
@ -134,7 +123,6 @@ class Account:
|
||||
|
||||
logger.info(f"logged in success {log_id}")
|
||||
self.status = Status.ACTIVE
|
||||
self.save()
|
||||
|
||||
async def get_guest_token(self):
|
||||
rep = await self.client.post("https://api.twitter.com/1.1/guest/activate.json")
|
||||
|
||||
@ -1,27 +1,34 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
from .account import Account, Status
|
||||
from .logger import logger
|
||||
from .utils import shuffle
|
||||
|
||||
|
||||
class AccountsPool:
|
||||
BASE_DIR = "accounts"
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, base_dir: str | None = None):
|
||||
self.accounts: list[Account] = []
|
||||
self.base_dir = base_dir or "accounts"
|
||||
|
||||
def load_from_dir(self, folder: str | None = None):
|
||||
folder = folder or self.BASE_DIR
|
||||
|
||||
files = os.listdir(folder)
|
||||
def restore(self):
|
||||
files = [os.path.join(self.base_dir, x) for x in os.listdir(self.base_dir)]
|
||||
files = [x for x in files if x.endswith(".json")]
|
||||
files = [os.path.join(folder, x) for x in files]
|
||||
|
||||
for file in files:
|
||||
account = Account.load(file)
|
||||
self._load_account_from_file(file)
|
||||
|
||||
def _load_account_from_file(self, filepath: str):
|
||||
account = Account.load_from_file(filepath)
|
||||
if account:
|
||||
username = set(x.username for x in self.accounts)
|
||||
if account.username in username:
|
||||
raise ValueError(f"Duplicate username {account.username}")
|
||||
self.accounts.append(account)
|
||||
return account
|
||||
|
||||
def _get_filename(self, username: str):
|
||||
return f"{self.base_dir}/{username}.json"
|
||||
|
||||
def add_account(
|
||||
self,
|
||||
@ -32,14 +39,20 @@ class AccountsPool:
|
||||
proxy: str | None = None,
|
||||
user_agent: str | None = None,
|
||||
):
|
||||
filepath = os.path.join(self.BASE_DIR, f"{login}.json")
|
||||
account = Account.load(filepath)
|
||||
account = self._load_account_from_file(self._get_filename(login))
|
||||
if account:
|
||||
self.accounts.append(account)
|
||||
return
|
||||
|
||||
account = Account(login, password, email, email_password, user_agent, proxy)
|
||||
self.accounts.append(account)
|
||||
account = Account(
|
||||
login,
|
||||
password,
|
||||
email,
|
||||
email_password,
|
||||
proxy=proxy,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
self.save_account(account)
|
||||
self._load_account_from_file(self._get_filename(login))
|
||||
|
||||
async def login(self):
|
||||
for x in self.accounts:
|
||||
@ -48,7 +61,8 @@ class AccountsPool:
|
||||
await x.login()
|
||||
except Exception as e:
|
||||
logger.error(f"Error logging in to {x.username}: {e}")
|
||||
pass
|
||||
finally:
|
||||
self.save_account(x)
|
||||
|
||||
def get_username_by_token(self, auth_token: str) -> str:
|
||||
for x in self.accounts:
|
||||
@ -57,7 +71,8 @@ class AccountsPool:
|
||||
return "UNKNOWN"
|
||||
|
||||
def get_account(self, queue: str) -> Account | None:
|
||||
for x in self.accounts:
|
||||
accounts = shuffle(self.accounts) # make random order each time
|
||||
for x in accounts:
|
||||
if x.can_use(queue):
|
||||
return x
|
||||
return None
|
||||
@ -72,3 +87,15 @@ class AccountsPool:
|
||||
else:
|
||||
logger.debug(f"No accounts available for queue '{queue}' (sleeping for 5 sec)")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
def save_account(self, account: Account):
|
||||
filename = self._get_filename(account.username)
|
||||
data = account.dump()
|
||||
|
||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||
with open(filename, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def update_limit(self, account: Account, queue: str, reset_ts: int):
|
||||
account.update_limit(queue, reset_ts)
|
||||
self.save_account(account)
|
||||
|
||||
23
twapi/api.py
23
twapi/api.py
@ -7,7 +7,7 @@ from .accounts_pool import AccountsPool
|
||||
from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
|
||||
from .logger import logger
|
||||
from .models import Tweet, User
|
||||
from .utils import encode_params, find_item, to_old_obj, to_search_like
|
||||
from .utils import encode_params, get_by_path, to_old_obj, to_search_like
|
||||
|
||||
|
||||
class API:
|
||||
@ -52,13 +52,13 @@ class API:
|
||||
if e.response.status_code == 429:
|
||||
logger.debug(f"Rate limit for account={account.username} on queue={queue}")
|
||||
reset_ts = int(e.response.headers.get("x-rate-limit-reset", 0))
|
||||
account.update_limit(queue, reset_ts)
|
||||
self.pool.update_limit(account, queue, reset_ts)
|
||||
continue
|
||||
|
||||
if e.response.status_code == 403:
|
||||
logger.debug(f"Account={account.username} is banned on queue={queue}")
|
||||
reset_ts = int(time.time() + 60 * 60) # 1 hour
|
||||
account.update_limit(queue, reset_ts)
|
||||
self.pool.update_limit(account, queue, reset_ts)
|
||||
continue
|
||||
|
||||
logger.error(f"[{e.response.status_code}] {e.request.url}\n{e.response.text}")
|
||||
@ -80,13 +80,13 @@ class API:
|
||||
logger.debug(e)
|
||||
return None
|
||||
|
||||
def get_ql_entries(self, obj: dict) -> list[dict]:
|
||||
entries = find_item(obj, "entries")
|
||||
def _get_ql_entries(self, obj: dict) -> list[dict]:
|
||||
entries = get_by_path(obj, "entries")
|
||||
return entries or []
|
||||
|
||||
def _get_ql_cursor(self, obj: dict) -> str | None:
|
||||
try:
|
||||
for entry in self.get_ql_entries(obj):
|
||||
for entry in self._get_ql_entries(obj):
|
||||
if entry["entryId"].startswith("cursor-bottom-"):
|
||||
return entry["content"]["value"]
|
||||
return None
|
||||
@ -104,7 +104,7 @@ class API:
|
||||
obj = rep.json()
|
||||
|
||||
# cursor-top / cursor-bottom always present
|
||||
entries = self.get_ql_entries(obj)
|
||||
entries = self._get_ql_entries(obj)
|
||||
entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
|
||||
cursor = self._get_ql_cursor(obj)
|
||||
|
||||
@ -141,11 +141,18 @@ class API:
|
||||
params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch"
|
||||
return await client.get(SEARCH_URL, params=params)
|
||||
|
||||
retries = 0
|
||||
async for rep in self._inf_req(queue, _get):
|
||||
data = rep.json()
|
||||
|
||||
cursor = self._get_search_cursor(data)
|
||||
tweets = data.get("globalObjects", {}).get("tweets", [])
|
||||
if not tweets and retries < 3:
|
||||
retries += 1
|
||||
continue
|
||||
else:
|
||||
retries = 0
|
||||
|
||||
cursor = self._get_search_cursor(data)
|
||||
|
||||
check = self._is_end(rep, q, tweets, cursor, count, limit)
|
||||
count, end_before, end_after = check
|
||||
|
||||
@ -1,16 +1,24 @@
|
||||
import email.utils
|
||||
from dataclasses import asdict, dataclass
|
||||
import json
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from .utils import get_or, int_or_none
|
||||
from snscrape.modules import twitter
|
||||
|
||||
from .logger import logger
|
||||
from .utils import find_item, get_or, int_or_none
|
||||
|
||||
|
||||
@dataclass
|
||||
class JSONTrait:
|
||||
def json(self):
|
||||
def dict(self):
|
||||
return asdict(self)
|
||||
|
||||
def json(self):
|
||||
return json.dumps(self.dict(), default=str)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Coordinates(JSONTrait):
|
||||
@ -80,6 +88,7 @@ class UserRef(JSONTrait):
|
||||
@dataclass
|
||||
class User(JSONTrait):
|
||||
id: int
|
||||
url: str
|
||||
username: str
|
||||
displayname: str
|
||||
rawDescription: str
|
||||
@ -100,14 +109,11 @@ class User(JSONTrait):
|
||||
# link: typing.Optional[TextLink] = None
|
||||
# label: typing.Optional["UserLabel"] = None
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
return f"https://twitter.com/{self.username}"
|
||||
|
||||
@staticmethod
|
||||
def parse(obj: dict):
|
||||
return User(
|
||||
id=int(obj["id_str"]),
|
||||
url=f'https://twitter.com/{obj["screen_name"]}',
|
||||
username=obj["screen_name"],
|
||||
displayname=obj["name"],
|
||||
rawDescription=obj["description"],
|
||||
@ -129,6 +135,7 @@ class User(JSONTrait):
|
||||
@dataclass
|
||||
class Tweet(JSONTrait):
|
||||
id: int
|
||||
url: str
|
||||
date: datetime
|
||||
user: User
|
||||
lang: str
|
||||
@ -147,30 +154,28 @@ class Tweet(JSONTrait):
|
||||
quotedTweet: Optional["Tweet"] = None
|
||||
place: Optional[Place] = None
|
||||
coordinates: Optional[Coordinates] = None
|
||||
inReplyToTweetId: int | None = None
|
||||
inReplyToUser: UserRef | None = None
|
||||
source: str | None = None
|
||||
sourceUrl: str | None = None
|
||||
sourceLabel: str | None = None
|
||||
|
||||
# renderedContent: str
|
||||
# source: str | None = None
|
||||
# sourceUrl: str | None = None
|
||||
# sourceLabel: str | None = None
|
||||
# media: typing.Optional[typing.List["Medium"]] = None
|
||||
# inReplyToTweetId: typing.Optional[int] = None
|
||||
# inReplyToUser: typing.Optional["User"] = None
|
||||
# card: typing.Optional["Card"] = None
|
||||
# vibe: typing.Optional["Vibe"] = None
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f"https://twitter.com/{self.user.username}/status/{self.id}"
|
||||
|
||||
@staticmethod
|
||||
def parse(obj: dict, res: dict):
|
||||
tw_usr = User.parse(res["users"][obj["user_id_str"]])
|
||||
rt_obj = get_or(res, f"tweets.{obj.get('retweeted_status_id_str')}")
|
||||
qt_obj = get_or(res, f"tweets.{obj.get('quoted_status_id_str')}")
|
||||
|
||||
return Tweet(
|
||||
id=int(obj["id_str"]),
|
||||
url=f'https://twitter.com/{tw_usr.username}/status/{obj["id_str"]}',
|
||||
date=email.utils.parsedate_to_datetime(obj["created_at"]),
|
||||
user=User.parse(res["users"][obj["user_id_str"]]),
|
||||
user=tw_usr,
|
||||
lang=obj["lang"],
|
||||
rawContent=obj["full_text"],
|
||||
replyCount=obj["reply_count"],
|
||||
@ -187,4 +192,40 @@ class Tweet(JSONTrait):
|
||||
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
||||
place=Place.parse(obj["place"]) if obj.get("place") else None,
|
||||
coordinates=Coordinates.parse(obj),
|
||||
inReplyToTweetId=int_or_none(obj, "in_reply_to_status_id_str"),
|
||||
inReplyToUser=_get_reply_user(obj, res),
|
||||
source=obj.get("source", None),
|
||||
sourceUrl=_get_source_url(obj),
|
||||
sourceLabel=_get_source_label(obj),
|
||||
)
|
||||
|
||||
|
||||
def _get_reply_user(tw_obj: dict, res: dict):
|
||||
user_id = tw_obj.get("in_reply_to_user_id_str", None)
|
||||
if user_id is None:
|
||||
return None
|
||||
|
||||
if user_id in res["users"]:
|
||||
return UserRef.parse(res["users"][user_id])
|
||||
|
||||
mentions = get_or(tw_obj, "entities.user_mentions", [])
|
||||
mention = find_item(mentions, lambda x: x["id_str"] == tw_obj["in_reply_to_user_id_str"])
|
||||
if mention:
|
||||
return UserRef.parse(mention)
|
||||
|
||||
logger.debug(f'{tw_obj["in_reply_to_user_id_str"]}\n{json.dumps(res)}')
|
||||
return None
|
||||
|
||||
|
||||
def _get_source_url(tw_obj: dict):
|
||||
source = tw_obj.get("source", None)
|
||||
if source and (match := re.search(r'href=[\'"]?([^\'" >]+)', source)):
|
||||
return str(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def _get_source_label(tw_obj: dict):
|
||||
source = tw_obj.get("source", None)
|
||||
if source and (match := re.search(r">([^<]*)<", source)):
|
||||
return str(match.group(1))
|
||||
return None
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import random
|
||||
from collections import defaultdict
|
||||
from typing import Any, AsyncGenerator, TypeVar
|
||||
from typing import Any, AsyncGenerator, Callable, TypeVar
|
||||
|
||||
from httpx import HTTPStatusError, Response
|
||||
|
||||
@ -53,7 +54,7 @@ def int_or_none(obj: dict, key: str):
|
||||
|
||||
|
||||
# https://stackoverflow.com/a/43184871
|
||||
def find_item(obj: dict, key: str, default=None):
|
||||
def get_by_path(obj: dict, key: str, default=None):
|
||||
stack = [iter(obj.items())]
|
||||
while stack:
|
||||
for k, v in stack[-1]:
|
||||
@ -70,6 +71,13 @@ def find_item(obj: dict, key: str, default=None):
|
||||
return default
|
||||
|
||||
|
||||
def find_item(lst: list[T], fn: Callable[[T], bool]) -> T | None:
|
||||
for item in lst:
|
||||
if fn(item):
|
||||
return item
|
||||
return None
|
||||
|
||||
|
||||
def get_typed_object(obj: dict, res: defaultdict[str, list]):
|
||||
obj_type = obj.get("__typename", None)
|
||||
if obj_type is not None:
|
||||
@ -100,3 +108,9 @@ def to_search_like(obj: dict):
|
||||
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
|
||||
|
||||
return {"tweets": tweets, "users": users}
|
||||
|
||||
|
||||
def shuffle(lst: list):
|
||||
lst = lst.copy()
|
||||
random.shuffle(lst)
|
||||
return lst
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user